qpel encoding

4mv+b frames encoding finally fixed
chroma ME
5 comparission functions for ME
b frame encoding speedup
wmv2 codec (unfinished)
user specified diamond size for EPZS

Originally committed as revision 1365 to svn://svn.ffmpeg.org/ffmpeg/trunk
pull/126/head
Michael Niedermayer 22 years ago
parent ac97734133
commit 1457ab5233
  1. 4
      libavcodec/allcodecs.c
  2. 39
      libavcodec/avcodec.h
  3. 308
      libavcodec/dsputil.c
  4. 35
      libavcodec/dsputil.h
  5. 175
      libavcodec/h263.c
  6. 67
      libavcodec/h263dec.c
  7. 161
      libavcodec/i386/dsputil_mmx.c
  8. 19
      libavcodec/i386/motion_est_mmx.c
  9. 1149
      libavcodec/motion_est.c
  10. 737
      libavcodec/motion_est_template.c
  11. 2
      libavcodec/mpeg12.c
  12. 188
      libavcodec/mpegvideo.c
  13. 60
      libavcodec/mpegvideo.h
  14. 55
      libavcodec/msmpeg4.c
  15. 241
      libavcodec/msmpeg4data.h
  16. 90
      libavcodec/simple_idct.c
  17. 3
      libavcodec/simple_idct.h
  18. 850
      libavcodec/wmv2.c

@ -53,7 +53,7 @@ void avcodec_register_all(void)
register_avcodec(&msmpeg4v2_encoder);
register_avcodec(&msmpeg4v3_encoder);
register_avcodec(&wmv1_encoder);
// register_avcodec(&wmv2_encoder);
register_avcodec(&wmv2_encoder);
register_avcodec(&huffyuv_encoder);
#endif /* CONFIG_ENCODERS */
register_avcodec(&rawvideo_codec);
@ -66,7 +66,7 @@ void avcodec_register_all(void)
register_avcodec(&msmpeg4v2_decoder);
register_avcodec(&msmpeg4v3_decoder);
register_avcodec(&wmv1_decoder);
// register_avcodec(&wmv2_decoder);
register_avcodec(&wmv2_decoder);
register_avcodec(&mpeg_decoder);
register_avcodec(&h263i_decoder);
register_avcodec(&rv10_decoder);

@ -5,8 +5,8 @@
#define LIBAVCODEC_VERSION_INT 0x000406
#define LIBAVCODEC_VERSION "0.4.6"
#define LIBAVCODEC_BUILD 4646
#define LIBAVCODEC_BUILD_STR "4646"
#define LIBAVCODEC_BUILD 4647
#define LIBAVCODEC_BUILD_STR "4647"
enum CodecID {
CODEC_ID_NONE,
@ -850,6 +850,41 @@ typedef struct AVCodecContext {
* decoding: unused
*/
int mb_qmax;
/**
* motion estimation compare function
* encoding: set by user.
* decoding: unused
*/
int me_cmp;
/**
* subpixel motion estimation compare function
* encoding: set by user.
* decoding: unused
*/
int me_sub_cmp;
/**
* macroblock compare function (not supported yet)
* encoding: set by user.
* decoding: unused
*/
int mb_cmp;
#define FF_CMP_SAD 0
#define FF_CMP_SSE 1
#define FF_CMP_SATD 2
#define FF_CMP_DCT 3
#define FF_CMP_PSNR 4
#define FF_CMP_BIT 5
#define FF_CMP_RD 6
#define FF_CMP_ZERO 7
#define FF_CMP_CHROMA 256
/**
* ME diamond size
* encoding: set by user.
* decoding: unused
*/
int dia_size;
} AVCodecContext;
typedef struct AVCodec {

@ -20,6 +20,7 @@
*/
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
int ff_bit_exact=0;
@ -144,7 +145,28 @@ static int pix_norm1_c(UINT8 * pix, int line_size)
}
static int pix_norm_c(UINT8 * pix1, UINT8 * pix2, int line_size)
static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
{
int s, i;
UINT32 *sq = squareTbl + 256;
s = 0;
for (i = 0; i < 8; i++) {
s += sq[pix1[0] - pix2[0]];
s += sq[pix1[1] - pix2[1]];
s += sq[pix1[2] - pix2[2]];
s += sq[pix1[3] - pix2[3]];
s += sq[pix1[4] - pix2[4]];
s += sq[pix1[5] - pix2[5]];
s += sq[pix1[6] - pix2[6]];
s += sq[pix1[7] - pix2[7]];
pix1 += line_size;
pix2 += line_size;
}
return s;
}
static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
{
int s, i, j;
UINT32 *sq = squareTbl + 256;
@ -1141,7 +1163,103 @@ QPEL_MC(0, avg_ , _ , op_avg)
#undef op_put
#undef op_put_no_rnd
static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
uint8_t *cm = cropTbl + MAX_NEG_CROP;
int i;
for(i=0; i<h; i++){
dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
dst+=dstStride;
src+=srcStride;
}
}
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
uint8_t *cm = cropTbl + MAX_NEG_CROP;
int i;
for(i=0; i<w; i++){
const int src_1= src[ -srcStride];
const int src0 = src[0 ];
const int src1 = src[ srcStride];
const int src2 = src[2*srcStride];
const int src3 = src[3*srcStride];
const int src4 = src[4*srcStride];
const int src5 = src[5*srcStride];
const int src6 = src[6*srcStride];
const int src7 = src[7*srcStride];
const int src8 = src[8*srcStride];
const int src9 = src[9*srcStride];
dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
src++;
dst++;
}
}
static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
put_pixels8_c(dst, src, stride, 8);
}
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
uint8_t half[64];
wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
}
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
}
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
uint8_t half[64];
wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
}
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
}
static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
uint8_t halfH[88];
uint8_t halfV[64];
uint8_t halfHV[64];
wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
}
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
uint8_t halfH[88];
uint8_t halfV[64];
uint8_t halfHV[64];
wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
}
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
uint8_t halfH[88];
wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
}
static inline int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{
int s, i;
@ -1257,7 +1375,7 @@ static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s;
}
static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
static inline int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{
int s, i;
@ -1341,6 +1459,14 @@ static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s;
}
static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){
return pix_abs16x16_c(a,b,stride);
}
static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){
return pix_abs8x8_c(a,b,stride);
}
void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last)
{
int i;
@ -1399,6 +1525,156 @@ static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
dst[i+0] = src1[i+0]-src2[i+0];
}
#define BUTTERFLY2(o1,o2,i1,i2) \
o1= (i1)+(i2);\
o2= (i1)-(i2);
#define BUTTERFLY1(x,y) \
{\
int a,b;\
a= x;\
b= y;\
x= a+b;\
y= a-b;\
}
#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){
int i;
int temp[64];
int sum=0;
for(i=0; i<8; i++){
//FIXME try pointer walks
BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
}
for(i=0; i<8; i++){
BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
sum +=
BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
}
#if 0
static int maxi=0;
if(sum>maxi){
maxi=sum;
printf("MAX:%d\n", maxi);
}
#endif
return sum;
}
static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
int i;
int temp[64];
int sum=0;
//FIXME OOOPS ignore 0 term instead of mean mess
for(i=0; i<8; i++){
//FIXME try pointer walks
BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
}
for(i=0; i<8; i++){
BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
sum +=
BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
}
return sum;
}
static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
MpegEncContext * const s= (MpegEncContext *)c;
DCTELEM temp[64];
int sum=0, i;
s->dsp.diff_pixels(temp, src1, src2, stride);
s->fdct(temp);
for(i=0; i<64; i++)
sum+= ABS(temp[i]);
return sum;
}
void simple_idct(INT16 *block); //FIXME
static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
MpegEncContext * const s= (MpegEncContext *)c;
DCTELEM temp[64], bak[64];
int sum=0, i;
s->mb_intra=0;
s->dsp.diff_pixels(temp, src1, src2, stride);
memcpy(bak, temp, 64*sizeof(DCTELEM));
s->dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
s->dct_unquantize(s, temp, 0, s->qscale);
simple_idct(temp); //FIXME
for(i=0; i<64; i++)
sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
return sum;
}
WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
void dsputil_init(DSPContext* c, unsigned mask)
{
static int init_done = 0;
@ -1429,7 +1705,8 @@ void dsputil_init(DSPContext* c, unsigned mask)
c->clear_blocks = clear_blocks_c;
c->pix_sum = pix_sum_c;
c->pix_norm1 = pix_norm1_c;
c->pix_norm = pix_norm_c;
c->sse[0]= sse16_c;
c->sse[1]= sse8_c;
/* TODO [0] 16 [1] 8 */
c->pix_abs16x16 = pix_abs16x16_c;
@ -1489,6 +1766,28 @@ void dsputil_init(DSPContext* c, unsigned mask)
/* dspfunc(avg_no_rnd_qpel, 1, 8); */
#undef dspfunc
c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
c->hadamard8_diff[0]= hadamard8_diff16_c;
c->hadamard8_diff[1]= hadamard8_diff_c;
c->hadamard8_abs = hadamard8_abs_c;
c->dct_sad[0]= dct_sad16x16_c;
c->dct_sad[1]= dct_sad8x8_c;
c->sad[0]= sad16x16_c;
c->sad[1]= sad8x8_c;
c->quant_psnr[0]= quant_psnr16x16_c;
c->quant_psnr[1]= quant_psnr8x8_c;
c->add_bytes= add_bytes_c;
c->diff_bytes= diff_bytes_c;
@ -1516,7 +1815,6 @@ void dsputil_init(DSPContext* c, unsigned mask)
#ifdef HAVE_MMI
dsputil_init_mmi(c, mask);
#endif
}
/* remove any non bit exact operation (testing purpose) */

@ -79,13 +79,10 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
/* motion estimation */
typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size);
/*
int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
*/
typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
typedef struct DSPContext {
/* pixel ops : interface with DCT */
void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
@ -98,7 +95,16 @@ typedef struct DSPContext {
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
int (*pix_sum)(UINT8 * pix, int line_size);
int (*pix_norm1)(UINT8 * pix, int line_size);
int (*pix_norm)(UINT8 * pix1, UINT8 * pix2, int line_size);
me_cmp_func sad[2]; /* identical to pix_absAxA except additional void * */
me_cmp_func sse[2];
me_cmp_func hadamard8_diff[2];
me_cmp_func dct_sad[2];
me_cmp_func quant_psnr[2];
int (*hadamard8_abs )(uint8_t *src, int stride, int mean);
me_cmp_func me_cmp[11];
me_cmp_func me_sub_cmp[11];
me_cmp_func mb_cmp[11];
/* maybe create an array for 16/8 functions */
op_pixels_func put_pixels_tab[2][4];
@ -109,6 +115,7 @@ typedef struct DSPContext {
qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func put_mspel_pixels_tab[8];
op_pixels_abs_func pix_abs16x16;
op_pixels_abs_func pix_abs16x16_x2;
@ -120,9 +127,8 @@ typedef struct DSPContext {
op_pixels_abs_func pix_abs8x8_xy2;
/* huffyuv specific */
//FIXME note: alignment isnt guranteed currently but could be if needed
void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/,int w);
void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
} DSPContext;
void dsputil_init(DSPContext* p, unsigned mask);
@ -156,6 +162,7 @@ static inline void emms(void)
__asm __volatile ("emms;":::"memory");
}
#define emms_c() \
{\
if (mm_flags & MM_MMX)\
@ -281,6 +288,14 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out,
const FFTSample *input, FFTSample *tmp);
void ff_mdct_end(MDCTContext *s);
#define WARPER88_1616(name8, name16)\
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride){\
return name8(s, dst , src , stride)\
+name8(s, dst+8 , src+8 , stride)\
+name8(s, dst +8*stride, src +8*stride, stride)\
+name8(s, dst+8+8*stride, src+8+8*stride, stride);\
}
#ifndef HAVE_LRINTF
/* XXX: add ISOC specific test to avoid specific BSD testing. */
/* better than nothing implementation. */

@ -204,10 +204,6 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */
put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */
if (s->pict_type == I_TYPE)
s->no_rounding = 0;
else
s->no_rounding ^= 1;
put_bits(&s->pb,1,s->no_rounding); /* Rounding Type */
put_bits(&s->pb,2,0); /* Reserved */
put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
@ -392,6 +388,57 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
}
}
void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
const int mb_index= s->mb_x + s->mb_y*s->mb_width;
int xy= s->block_index[0];
uint16_t time_pp= s->pp_time;
uint16_t time_pb= s->pb_time;
int i;
//FIXME avoid divides
switch(s->co_located_type_table[mb_index]){
case 0:
s->mv_type= MV_TYPE_16X16;
s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0]
: s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1]
: s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
break;
case CO_LOCATED_TYPE_4MV:
s->mv_type = MV_TYPE_8X8;
for(i=0; i<4; i++){
xy= s->block_index[i];
s->mv[0][i][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
s->mv[0][i][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->motion_val[xy][0]
: s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1]
: s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
}
break;
case CO_LOCATED_TYPE_FIELDMV:
s->mv_type = MV_TYPE_FIELD;
for(i=0; i<2; i++){
if(s->top_field_first){
time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i;
time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i;
}else{
time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i;
time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i;
}
s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx;
s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my;
s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0]
: s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp;
s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1]
: s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp;
}
break;
}
}
#ifdef CONFIG_ENCODERS
void mpeg4_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
@ -442,7 +489,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
return;
}
if ((cbp | motion_x | motion_y | mb_type) ==0) {
/* direct MB with MV={0,0} */
assert(s->dquant==0);
@ -1386,7 +1433,7 @@ void h263_encode_init(MpegEncContext *s)
init_mv_penalty_and_fcode(s);
}
s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
s->me.mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
// use fcodes >1 only for mpeg4 & h263 & h263p FIXME
switch(s->codec_id){
@ -1519,7 +1566,7 @@ void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){
static void mpeg4_encode_vol_header(MpegEncContext * s)
{
int vo_ver_id=1; //must be 2 if we want GMC or q-pel
int vo_ver_id=2; //must be 2 if we want GMC or q-pel
char buf[255];
if(s->max_b_frames){
@ -1584,7 +1631,7 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
if(s->mpeg_quant) put_bits(&s->pb, 2, 0); /* no custom matrixes */
if (vo_ver_id != 1)
put_bits(&s->pb, 1, s->quarter_sample=0);
put_bits(&s->pb, 1, s->quarter_sample);
put_bits(&s->pb, 1, 1); /* complexity estimation disable */
s->resync_marker= s->rtp_mode;
put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
@ -1618,7 +1665,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
int time_div, time_mod;
if(s->pict_type==I_TYPE){
s->no_rounding=0;
if(picture_number==0 || !s->strict_std_compliance)
mpeg4_encode_vol_header(s);
}
@ -1645,7 +1691,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 1, 1); /* vop coded */
if ( s->pict_type == P_TYPE
|| (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
s->no_rounding ^= 1;
put_bits(&s->pb, 1, s->no_rounding); /* rounding type */
}
put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */
@ -1996,6 +2041,61 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
}
#endif
}
static inline int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
UINT8 *scan_table)
{
int i, last_non_zero;
const RLTable *rl;
UINT8 *len_tab;
const int last_index = s->block_last_index[n];
int len=0;
if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
/* mpeg4 based DC predictor */
//mpeg4_encode_dc(dc_pb, intra_dc, n); //FIXME
if(last_index<1) return len;
i = 1;
rl = &rl_intra;
len_tab = uni_mpeg4_intra_rl_len;
} else {
if(last_index<0) return 0;
i = 0;
rl = &rl_inter;
len_tab = uni_mpeg4_inter_rl_len;
}
/* AC coefs */
last_non_zero = i - 1;
for (; i < last_index; i++) {
int level = block[ scan_table[i] ];
if (level) {
int run = i - last_non_zero - 1;
level+=64;
if((level&(~127)) == 0){
const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
len += len_tab[index];
}else{ //ESC3
len += 7+2+1+6+1+12+1;
}
last_non_zero = i;
}
}
/*if(i<=last_index)*/{
int level = block[ scan_table[i] ];
int run = i - last_non_zero - 1;
level+=64;
if((level&(~127)) == 0){
const int index= UNI_MPEG4_ENC_INDEX(1, run, level);
len += len_tab[index];
}else{ //ESC3
len += 7+2+1+6+1+12+1;
}
}
return len;
}
#endif
@ -3050,8 +3150,6 @@ int ff_h263_decode_mb(MpegEncContext *s,
int modb1; // first bit of modb
int modb2; // second bit of modb
int mb_type;
uint16_t time_pp;
uint16_t time_pb;
int xy;
s->mb_intra = 0; //B-frames never contain intra blocks
@ -3173,9 +3271,6 @@ int ff_h263_decode_mb(MpegEncContext *s,
}
if(mb_type==4 || mb_type==MB_TYPE_B_DIRECT){
int mb_index= s->mb_x + s->mb_y*s->mb_width;
int i;
if(mb_type==4)
mx=my=0;
else{
@ -3184,55 +3279,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
}
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
xy= s->block_index[0];
time_pp= s->pp_time;
time_pb= s->pb_time;
//FIXME avoid divides
switch(s->co_located_type_table[mb_index]){
case 0:
s->mv_type= MV_TYPE_16X16;
s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0]
: s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1]
: s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
PRINT_MB_TYPE(mb_type==4 ? "D" : "S");
break;
case CO_LOCATED_TYPE_4MV:
s->mv_type = MV_TYPE_8X8;
for(i=0; i<4; i++){
xy= s->block_index[i];
s->mv[0][i][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
s->mv[0][i][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->motion_val[xy][0]
: s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1]
: s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
}
PRINT_MB_TYPE("4");
break;
case CO_LOCATED_TYPE_FIELDMV:
s->mv_type = MV_TYPE_FIELD;
for(i=0; i<2; i++){
if(s->top_field_first){
time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i;
time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i;
}else{
time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i;
time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i;
}
s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx;
s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my;
s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0]
: s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp;
s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1]
: s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp;
}
PRINT_MB_TYPE("=");
break;
}
ff_mpeg4_set_direct_mv(s, mx, my);
}
if(mb_type<0 || mb_type>4){

@ -40,7 +40,7 @@ static inline long long rdtsc()
}
#endif
static int h263_decode_init(AVCodecContext *avctx)
int ff_h263_decode_init(AVCodecContext *avctx)
{
MpegEncContext *s = avctx->priv_data;
@ -113,7 +113,7 @@ static int h263_decode_init(AVCodecContext *avctx)
return 0;
}
static int h263_decode_end(AVCodecContext *avctx)
int ff_h263_decode_end(AVCodecContext *avctx)
{
MpegEncContext *s = avctx->priv_data;
@ -343,7 +343,7 @@ static int mpeg4_find_frame_end(MpegEncContext *s, UINT8 *buf, int buf_size){
return -1;
}
static int h263_decode_frame(AVCodecContext *avctx,
int ff_h263_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
UINT8 *buf, int buf_size)
{
@ -416,9 +416,11 @@ retry:
if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix
return -1;
}
/* let's go :-) */
if (s->h263_msmpeg4) {
if (s->msmpeg4_version==5) {
ret= ff_wmv2_decode_picture_header(s);
} else if (s->msmpeg4_version) {
ret = msmpeg4_decode_picture_header(s);
} else if (s->h263_pred) {
if(s->avctx->extradata_size && s->picture_number==0){
@ -634,7 +636,6 @@ retry:
}
if(num_end_markers || error){
fprintf(stderr, "concealing errors\n");
//printf("type:%d\n", s->pict_type);
ff_error_resilience(s);
}
}
@ -713,10 +714,10 @@ AVCodec mpeg4_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_MPEG4,
sizeof(MpegEncContext),
h263_decode_init,
ff_h263_decode_init,
NULL,
h263_decode_end,
h263_decode_frame,
ff_h263_decode_end,
ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED,
};
@ -725,10 +726,10 @@ AVCodec h263_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_H263,
sizeof(MpegEncContext),
h263_decode_init,
ff_h263_decode_init,
NULL,
h263_decode_end,
h263_decode_frame,
ff_h263_decode_end,
ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
@ -737,10 +738,10 @@ AVCodec msmpeg4v1_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_MSMPEG4V1,
sizeof(MpegEncContext),
h263_decode_init,
ff_h263_decode_init,
NULL,
h263_decode_end,
h263_decode_frame,
ff_h263_decode_end,
ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
@ -749,10 +750,10 @@ AVCodec msmpeg4v2_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_MSMPEG4V2,
sizeof(MpegEncContext),
h263_decode_init,
ff_h263_decode_init,
NULL,
h263_decode_end,
h263_decode_frame,
ff_h263_decode_end,
ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
@ -761,10 +762,10 @@ AVCodec msmpeg4v3_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_MSMPEG4V3,
sizeof(MpegEncContext),
h263_decode_init,
ff_h263_decode_init,
NULL,
h263_decode_end,
h263_decode_frame,
ff_h263_decode_end,
ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
@ -773,22 +774,10 @@ AVCodec wmv1_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_WMV1,
sizeof(MpegEncContext),
h263_decode_init,
NULL,
h263_decode_end,
h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
AVCodec wmv2_decoder = {
"wmv2",
CODEC_TYPE_VIDEO,
CODEC_ID_WMV2,
sizeof(MpegEncContext),
h263_decode_init,
ff_h263_decode_init,
NULL,
h263_decode_end,
h263_decode_frame,
ff_h263_decode_end,
ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
@ -797,10 +786,10 @@ AVCodec h263i_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_H263I,
sizeof(MpegEncContext),
h263_decode_init,
ff_h263_decode_init,
NULL,
h263_decode_end,
h263_decode_frame,
ff_h263_decode_end,
ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};

@ -43,6 +43,11 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int sad16x16_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
int sad8x8_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
int sad16x16_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
int sad8x8_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
/* pixel operations */
static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
@ -213,7 +218,7 @@ static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size)
);
}
static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride)
static inline void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride)
{
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
@ -496,7 +501,150 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
for(; i<w; i++)
dst[i+0] = src1[i+0]-src2[i+0];
}
#define LBUTTERFLY(a,b)\
"paddw " #b ", " #a " \n\t"\
"paddw " #b ", " #b " \n\t"\
"psubw " #a ", " #b " \n\t"
#define HADAMARD48\
LBUTTERFLY(%%mm0, %%mm1)\
LBUTTERFLY(%%mm2, %%mm3)\
LBUTTERFLY(%%mm4, %%mm5)\
LBUTTERFLY(%%mm6, %%mm7)\
\
LBUTTERFLY(%%mm0, %%mm2)\
LBUTTERFLY(%%mm1, %%mm3)\
LBUTTERFLY(%%mm4, %%mm6)\
LBUTTERFLY(%%mm5, %%mm7)\
\
LBUTTERFLY(%%mm0, %%mm4)\
LBUTTERFLY(%%mm1, %%mm5)\
LBUTTERFLY(%%mm2, %%mm6)\
LBUTTERFLY(%%mm3, %%mm7)
#define MMABS(a,z)\
"pxor " #z ", " #z " \n\t"\
"pcmpgtw " #a ", " #z " \n\t"\
"pxor " #z ", " #a " \n\t"\
"psubw " #z ", " #a " \n\t"
#define MMABS_SUM(a,z, sum)\
"pxor " #z ", " #z " \n\t"\
"pcmpgtw " #a ", " #z " \n\t"\
"pxor " #z ", " #a " \n\t"\
"psubw " #z ", " #a " \n\t"\
"paddusw " #a ", " #sum " \n\t"
#define SBUTTERFLY(a,b,t,n)\
"movq " #a ", " #t " \n\t" /* abcd */\
"punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
"punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
#define TRANSPOSE4(a,b,c,d,t)\
SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\
SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\
SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
#define LOAD4(o, a, b, c, d)\
"movq "#o"(%1), " #a " \n\t"\
"movq "#o"+16(%1), " #b " \n\t"\
"movq "#o"+32(%1), " #c " \n\t"\
"movq "#o"+48(%1), " #d " \n\t"
#define STORE4(o, a, b, c, d)\
"movq "#a", "#o"(%1) \n\t"\
"movq "#b", "#o"+16(%1) \n\t"\
"movq "#c", "#o"+32(%1) \n\t"\
"movq "#d", "#o"+48(%1) \n\t"\
static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride){
uint64_t temp[16] __align8;
int sum=0;
diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
asm volatile(
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
HADAMARD48
"movq %%mm7, 112(%1) \n\t"
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
"movq 112(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
HADAMARD48
"movq %%mm7, 120(%1) \n\t"
TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
"movq 120(%1), %%mm7 \n\t"
TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
"movq %%mm7, %%mm5 \n\t"//FIXME remove
"movq %%mm6, %%mm7 \n\t"
"movq %%mm0, %%mm6 \n\t"
// STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
// LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
HADAMARD48
"movq %%mm7, 64(%1) \n\t"
MMABS(%%mm0, %%mm7)
MMABS_SUM(%%mm1, %%mm7, %%mm0)
MMABS_SUM(%%mm2, %%mm7, %%mm0)
MMABS_SUM(%%mm3, %%mm7, %%mm0)
MMABS_SUM(%%mm4, %%mm7, %%mm0)
MMABS_SUM(%%mm5, %%mm7, %%mm0)
MMABS_SUM(%%mm6, %%mm7, %%mm0)
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM(%%mm1, %%mm7, %%mm0)
"movq %%mm0, 64(%1) \n\t"
LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
HADAMARD48
"movq %%mm7, (%1) \n\t"
MMABS(%%mm0, %%mm7)
MMABS_SUM(%%mm1, %%mm7, %%mm0)
MMABS_SUM(%%mm2, %%mm7, %%mm0)
MMABS_SUM(%%mm3, %%mm7, %%mm0)
MMABS_SUM(%%mm4, %%mm7, %%mm0)
MMABS_SUM(%%mm5, %%mm7, %%mm0)
MMABS_SUM(%%mm6, %%mm7, %%mm0)
"movq (%1), %%mm1 \n\t"
MMABS_SUM(%%mm1, %%mm7, %%mm0)
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM(%%mm1, %%mm7, %%mm0)
"movq %%mm0, %%mm1 \n\t"
"psrlq $32, %%mm0 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movq %%mm0, %%mm1 \n\t"
"psrlq $16, %%mm0 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movd %%mm0, %0 \n\t"
: "=r" (sum)
: "r"(temp)
);
return sum&0xFFFF;
}
WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx)
#if 0
static void just_return() { return; }
@ -579,7 +727,13 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
c->add_bytes= add_bytes_mmx;
c->diff_bytes= diff_bytes_mmx;
c->hadamard8_diff[0]= hadamard8_diff16_mmx;
c->hadamard8_diff[1]= hadamard8_diff_mmx;
c->sad[0]= sad16x16_mmx;
c->sad[1]= sad8x8_mmx;
if (mm_flags & MM_MMXEXT) {
c->pix_abs16x16 = pix_abs16x16_mmx2;
c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
@ -591,6 +745,9 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
c->sad[0]= sad16x16_mmx2;
c->sad[1]= sad8x8_mmx2;
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;

@ -274,6 +274,15 @@ int pix_abs8x8_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
\
return sum_ ## suf();\
}\
int sad8x8_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
sad8_ ## suf(blk1, blk2, stride, 3);\
\
return sum_ ## suf();\
}\
\
int pix_abs8x8_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
{\
@ -324,6 +333,16 @@ int pix_abs16x16_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
\
return sum_ ## suf();\
}\
int sad16x16_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
sad8_ ## suf(blk1 , blk2 , stride, 4);\
sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
\
return sum_ ## suf();\
}\
int pix_abs16x16_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\

File diff suppressed because it is too large Load Diff

@ -0,0 +1,737 @@
/*
* Motion estimation
* Copyright (c) 2002 Michael Niedermayer
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
//lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
//Note, the last line is there to kill these ugly unused var warnings
#define LOAD_COMMON(x, y)\
uint32_t * const score_map= s->me.score_map;\
const int stride= s->linesize;\
const int uvstride= s->uvlinesize;\
const int time_pp= s->pp_time;\
const int time_pb= s->pb_time;\
uint8_t * const src_y= s->new_picture.data[0] + ((y) * stride) + (x);\
uint8_t * const src_u= s->new_picture.data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
uint8_t * const src_v= s->new_picture.data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
uint8_t * const ref_y= ref_picture->data[0] + ((y) * stride) + (x);\
uint8_t * const ref_u= ref_picture->data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
uint8_t * const ref_v= ref_picture->data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
uint8_t * const ref2_y= s->next_picture.data[0] + ((y) * stride) + (x);\
op_pixels_func (*hpel_put)[4];\
op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\
op_pixels_func (*chroma_hpel_put)[4];\
qpel_mc_func (*qpel_put)[16];\
qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\
const __attribute__((unused)) int unu= time_pp + time_pb + (int)src_u + (int)src_v + (int)ref_u + (int)ref_v\
+ (int)ref2_y + (int)hpel_avg + (int)qpel_avg;\
if(s->no_rounding /*FIXME b_type*/){\
hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\
chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\
qpel_put= &s->dsp.put_no_rnd_qpel_pixels_tab[size];\
}else{\
hpel_put=& s->dsp.put_pixels_tab[size];\
chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];\
qpel_put= &s->dsp.put_qpel_pixels_tab[size];\
}
#ifdef CMP_HPEL
#define CHECK_HALF_MV(dx, dy, x, y)\
{\
const int hx= 2*(x)+(dx);\
const int hy= 2*(y)+(dy);\
CMP_HPEL(d, dx, dy, x, y, size);\
d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
}
#if 0
static int RENAME(hpel_motion_search)(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int xmin, int ymin, int xmax, int ymax,
int pred_x, int pred_y, Picture *ref_picture,
int n, int size)
{
UINT8 *ptr;
const int xx = 16 * s->mb_x + 8*(n&1);
const int yy = 16 * s->mb_y + 8*(n>>1);
const int mx = *mx_ptr;
const int my = *my_ptr;
LOAD_COMMON(xx, yy);
// INIT;
//FIXME factorize
me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
if(s->no_rounding /*FIXME b_type*/){
hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
}else{
hpel_put=& s->dsp.put_pixels_tab[size];
chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
}
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1];
cmp_sub= s->dsp.me_sub_cmp[size];
chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
if(s->me.skip){ //FIXME somehow move up (benchmark)
*mx_ptr = 0;
*my_ptr = 0;
return dmin;
}
if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
CMP_HPEL(dmin, 0, 0, mx, my, size);
if(mx || my)
dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
}
if (mx > xmin && mx < xmax &&
my > ymin && my < ymax) {
int bx=2*mx, by=2*my;
int d= dmin;
CHECK_HALF_MV(1, 1, mx-1, my-1)
CHECK_HALF_MV(0, 1, mx , my-1)
CHECK_HALF_MV(1, 1, mx , my-1)
CHECK_HALF_MV(1, 0, mx-1, my )
CHECK_HALF_MV(1, 0, mx , my )
CHECK_HALF_MV(1, 1, mx-1, my )
CHECK_HALF_MV(0, 1, mx , my )
CHECK_HALF_MV(1, 1, mx , my )
assert(bx < xmin*2 || bx > xmax*2 || by < ymin*2 || by > ymax*2);
*mx_ptr = bx;
*my_ptr = by;
}else{
*mx_ptr =2*mx;
*my_ptr =2*my;
}
return dmin;
}
#else
static int RENAME(hpel_motion_search)(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int xmin, int ymin, int xmax, int ymax,
int pred_x, int pred_y, Picture *ref_picture,
int n, int size, uint16_t * const mv_penalty)
{
const int xx = 16 * s->mb_x + 8*(n&1);
const int yy = 16 * s->mb_y + 8*(n>>1);
const int mx = *mx_ptr;
const int my = *my_ptr;
const int penalty_factor= s->me.sub_penalty_factor;
me_cmp_func cmp_sub, chroma_cmp_sub;
LOAD_COMMON(xx, yy);
//FIXME factorize
cmp_sub= s->dsp.me_sub_cmp[size];
chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
if(s->me.skip){ //FIXME move out of hpel?
*mx_ptr = 0;
*my_ptr = 0;
return dmin;
}
if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
CMP_HPEL(dmin, 0, 0, mx, my, size);
if(mx || my)
dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
}
if (mx > xmin && mx < xmax &&
my > ymin && my < ymax) {
int bx=2*mx, by=2*my;
int d= dmin;
const int index= (my<<ME_MAP_SHIFT) + mx;
const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
+ (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*penalty_factor;
const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
+ (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*penalty_factor;
const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
+ (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*penalty_factor;
const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
+ (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*penalty_factor;
if(t<=b){
CHECK_HALF_MV(0, 1, mx ,my-1)
if(l<=r){
CHECK_HALF_MV(1, 1, mx-1, my-1)
if(t+r<=b+l){
CHECK_HALF_MV(1, 1, mx , my-1)
}else{
CHECK_HALF_MV(1, 1, mx-1, my )
}
CHECK_HALF_MV(1, 0, mx-1, my )
}else{
CHECK_HALF_MV(1, 1, mx , my-1)
if(t+l<=b+r){
CHECK_HALF_MV(1, 1, mx-1, my-1)
}else{
CHECK_HALF_MV(1, 1, mx , my )
}
CHECK_HALF_MV(1, 0, mx , my )
}
}else{
if(l<=r){
if(t+l<=b+r){
CHECK_HALF_MV(1, 1, mx-1, my-1)
}else{
CHECK_HALF_MV(1, 1, mx , my )
}
CHECK_HALF_MV(1, 0, mx-1, my)
CHECK_HALF_MV(1, 1, mx-1, my)
}else{
if(t+r<=b+l){
CHECK_HALF_MV(1, 1, mx , my-1)
}else{
CHECK_HALF_MV(1, 1, mx-1, my)
}
CHECK_HALF_MV(1, 0, mx , my)
CHECK_HALF_MV(1, 1, mx , my)
}
CHECK_HALF_MV(0, 1, mx , my)
}
assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
*mx_ptr = bx;
*my_ptr = by;
}else{
*mx_ptr =2*mx;
*my_ptr =2*my;
}
return dmin;
}
#endif
#endif /* CMP_HPEL */
#ifdef CMP_QPEL
#define CHECK_QUARTER_MV(dx, dy, x, y)\
{\
const int hx= 4*(x)+(dx);\
const int hy= 4*(y)+(dy);\
CMP_QPEL(d, dx, dy, x, y, size);\
d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
}
static int RENAME(qpel_motion_search)(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int xmin, int ymin, int xmax, int ymax,
int pred_x, int pred_y, Picture *ref_picture,
int n, int size, uint16_t * const mv_penalty)
{
const int xx = 16 * s->mb_x + 8*(n&1);
const int yy = 16 * s->mb_y + 8*(n>>1);
const int mx = *mx_ptr;
const int my = *my_ptr;
const int penalty_factor= s->me.sub_penalty_factor;
const int map_generation= s->me.map_generation;
uint32_t *map= s->me.map;
me_cmp_func cmp, chroma_cmp;
me_cmp_func cmp_sub, chroma_cmp_sub;
LOAD_COMMON(xx, yy);
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME
//FIXME factorize
cmp_sub= s->dsp.me_sub_cmp[size];
chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
if(s->me.skip){ //FIXME somehow move up (benchmark)
*mx_ptr = 0;
*my_ptr = 0;
return dmin;
}
if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
CMP_QPEL(dmin, 0, 0, mx, my, size);
if(mx || my)
dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
}
if (mx > xmin && mx < xmax &&
my > ymin && my < ymax) {
int bx=4*mx, by=4*my;
int d= dmin;
int i, nx, ny;
const int index= (my<<ME_MAP_SHIFT) + mx;
const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
const int c= score_map[(index )&(ME_MAP_SIZE-1)];
int best[8];
int best_pos[8][2];
memset(best, 64, sizeof(int)*8);
#if 1
if(s->avctx->dia_size>=2){
const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
for(ny= -3; ny <= 3; ny++){
for(nx= -3; nx <= 3; nx++){
const int t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
const int c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
const int b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
int score= ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2;
int i;
if((nx&3)==0 && (ny&3)==0) continue;
score += 1024*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
// if(nx&1) score-=1024*s->me.penalty_factor;
// if(ny&1) score-=1024*s->me.penalty_factor;
for(i=0; i<8; i++){
if(score < best[i]){
memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
best[i]= score;
best_pos[i][0]= nx + 4*mx;
best_pos[i][1]= ny + 4*my;
break;
}
}
}
}
}else{
int tl;
const int cx = 4*(r - l);
const int cx2= r + l - 2*c;
const int cy = 4*(b - t);
const int cy2= b + t - 2*c;
int cxy;
if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
}else{
CMP(tl, mx-1, my-1, size); //FIXME wrong if chroma me is different
}
cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
assert(16*cx2 + 4*cx + 32*c == 32*r);
assert(16*cx2 - 4*cx + 32*c == 32*l);
assert(16*cy2 + 4*cy + 32*c == 32*b);
assert(16*cy2 - 4*cy + 32*c == 32*t);
assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
for(ny= -3; ny <= 3; ny++){
for(nx= -3; nx <= 3; nx++){
int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
int i;
if((nx&3)==0 && (ny&3)==0) continue;
score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
// if(nx&1) score-=32*s->me.penalty_factor;
// if(ny&1) score-=32*s->me.penalty_factor;
for(i=0; i<8; i++){
if(score < best[i]){
memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
best[i]= score;
best_pos[i][0]= nx + 4*mx;
best_pos[i][1]= ny + 4*my;
break;
}
}
}
}
}
for(i=0; i<8; i++){
nx= best_pos[i][0];
ny= best_pos[i][1];
CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
}
#if 0
nx= FFMAX(4*mx - bx, bx - 4*mx);
ny= FFMAX(4*my - by, by - 4*my);
static int stats[4][4];
stats[nx][ny]++;
if(256*256*256*64 % (stats[0][0]+1) ==0){
for(i=0; i<16; i++){
if((i&3)==0) printf("\n");
printf("%6d ", stats[0][i]);
}
printf("\n");
}
#endif
#else
CHECK_QUARTER_MV(2, 2, mx-1, my-1)
CHECK_QUARTER_MV(0, 2, mx , my-1)
CHECK_QUARTER_MV(2, 2, mx , my-1)
CHECK_QUARTER_MV(2, 0, mx , my )
CHECK_QUARTER_MV(2, 2, mx , my )
CHECK_QUARTER_MV(0, 2, mx , my )
CHECK_QUARTER_MV(2, 2, mx-1, my )
CHECK_QUARTER_MV(2, 0, mx-1, my )
nx= bx;
ny= by;
for(i=0; i<8; i++){
int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
}
#endif
#if 0
//outer ring
CHECK_QUARTER_MV(1, 3, mx-1, my-1)
CHECK_QUARTER_MV(1, 2, mx-1, my-1)
CHECK_QUARTER_MV(1, 1, mx-1, my-1)
CHECK_QUARTER_MV(2, 1, mx-1, my-1)
CHECK_QUARTER_MV(3, 1, mx-1, my-1)
CHECK_QUARTER_MV(0, 1, mx , my-1)
CHECK_QUARTER_MV(1, 1, mx , my-1)
CHECK_QUARTER_MV(2, 1, mx , my-1)
CHECK_QUARTER_MV(3, 1, mx , my-1)
CHECK_QUARTER_MV(3, 2, mx , my-1)
CHECK_QUARTER_MV(3, 3, mx , my-1)
CHECK_QUARTER_MV(3, 0, mx , my )
CHECK_QUARTER_MV(3, 1, mx , my )
CHECK_QUARTER_MV(3, 2, mx , my )
CHECK_QUARTER_MV(3, 3, mx , my )
CHECK_QUARTER_MV(2, 3, mx , my )
CHECK_QUARTER_MV(1, 3, mx , my )
CHECK_QUARTER_MV(0, 3, mx , my )
CHECK_QUARTER_MV(3, 3, mx-1, my )
CHECK_QUARTER_MV(2, 3, mx-1, my )
CHECK_QUARTER_MV(1, 3, mx-1, my )
CHECK_QUARTER_MV(1, 2, mx-1, my )
CHECK_QUARTER_MV(1, 1, mx-1, my )
CHECK_QUARTER_MV(1, 0, mx-1, my )
#endif
assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
*mx_ptr = bx;
*my_ptr = by;
}else{
*mx_ptr =4*mx;
*my_ptr =4*my;
}
return dmin;
}
#endif /* CMP_QPEL */
#define CHECK_MV(x,y)\
{\
const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
if(map[index]!=key){\
CMP(d, x, y, size);\
map[index]= key;\
score_map[index]= d;\
d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
}\
}
#define CHECK_MV_DIR(x,y,new_dir)\
{\
const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
if(map[index]!=key){\
CMP(d, x, y, size);\
map[index]= key;\
score_map[index]= d;\
d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
if(d<dmin){\
best[0]=x;\
best[1]=y;\
dmin=d;\
next_dir= new_dir;\
}\
}\
}
#define check(x,y,S,v)\
if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin,
Picture *ref_picture,
int const pred_x, int const pred_y, int const penalty_factor,
int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
uint32_t *map, int map_generation, int size, uint16_t * const mv_penalty
)
{
me_cmp_func cmp, chroma_cmp;
int next_dir=-1;
LOAD_COMMON(s->mb_x*16, s->mb_y*16);
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1];
for(;;){
int d;
const int dir= next_dir;
const int x= best[0];
const int y= best[1];
next_dir=-1;
//printf("%d", dir);
if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
if(next_dir==-1){
return dmin;
}
}
}
static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin,
Picture *ref_picture,
int const pred_x, int const pred_y, int const penalty_factor,
int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
uint32_t *map, int map_generation, int size, uint16_t * const mv_penalty
)
{
me_cmp_func cmp, chroma_cmp;
int dia_size=1;
LOAD_COMMON(s->mb_x*16, s->mb_y*16);
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1];
for(dia_size=1; dia_size<=s->avctx->dia_size; dia_size++){
int dir, start, end;
const int x= best[0];
const int y= best[1];
start= FFMAX(0, y + dia_size - ymax);
end = FFMIN(dia_size, xmax - x);
for(dir= start; dir<end; dir++){
int d;
//check(x + dir,y + dia_size - dir,0, a0)
CHECK_MV(x + dir , y + dia_size - dir);
}
start= FFMAX(0, x + dia_size - xmax);
end = FFMIN(dia_size, y - ymin);
for(dir= start; dir<end; dir++){
int d;
//check(x + dia_size - dir, y - dir,0, a1)
CHECK_MV(x + dia_size - dir, y - dir );
}
start= FFMAX(0, -y + dia_size + ymin );
end = FFMIN(dia_size, x - xmin);
for(dir= start; dir<end; dir++){
int d;
//check(x - dir,y - dia_size + dir,0, a2)
CHECK_MV(x - dir , y - dia_size + dir);
}
start= FFMAX(0, -x + dia_size + xmin );
end = FFMIN(dia_size, ymax - y);
for(dir= start; dir<end; dir++){
int d;
//check(x - dia_size + dir, y + dir,0, a3)
CHECK_MV(x - dia_size + dir, y + dir );
}
if(x!=best[0] || y!=best[1])
dia_size=0;
}
return dmin;
}
static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
int *mx_ptr, int *my_ptr,
int P[10][2], int pred_x, int pred_y,
int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, uint16_t * const mv_penalty)
{
int best[2]={0, 0};
int d, dmin;
const int shift= 1+s->quarter_sample;
uint32_t *map= s->me.map;
int map_generation;
const int penalty_factor= s->me.penalty_factor;
const int size=0;
me_cmp_func cmp, chroma_cmp;
LOAD_COMMON(s->mb_x*16, s->mb_y*16);
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1];
map_generation= update_map_generation(s);
CMP(dmin, 0, 0, size);
map[0]= map_generation;
score_map[0]= dmin;
/* first line */
if ((s->mb_y == 0 || s->first_slice_line)) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
}else{
if(dmin<256 && ( P_LEFT[0] |P_LEFT[1]
|P_TOP[0] |P_TOP[1]
|P_TOPRIGHT[0]|P_TOPRIGHT[1])==0 && s->avctx->dia_size==0){
*mx_ptr= 0;
*my_ptr= 0;
s->me.skip=1;
return dmin;
}
CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
if(dmin>256*2){
CHECK_MV(P_LAST[0] >>shift, P_LAST[1] >>shift)
CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
}
}
if(dmin>256*4){
CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift)
CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
}
#if 0 //doest only slow things down
if(dmin>512*3){
int step;
dmin= score_map[0];
best[0]= best[1]=0;
for(step=128; step>0; step>>=1){
const int step2= step;
int y;
for(y=-step2+best[1]; y<=step2+best[1]; y+=step){
int x;
if(y<ymin || y>ymax) continue;
for(x=-step2+best[0]; x<=step2+best[0]; x+=step){
if(x<xmin || x>xmax) continue;
if(x==best[0] && y==best[1]) continue;
CHECK_MV(x,y)
}
}
}
}
#endif
//check(best[0],best[1],0, b0)
if(s->avctx->dia_size<2)
dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
shift, map, map_generation, size, mv_penalty);
else
dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
shift, map, map_generation, size, mv_penalty);
//check(best[0],best[1],0, b1)
*mx_ptr= best[0];
*my_ptr= best[1];
// printf("%d %d %d \n", best[0], best[1], dmin);
return dmin;
}
#ifndef CMP_DIRECT /* no 4mv search needed in direct mode */
static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
int *mx_ptr, int *my_ptr,
int P[10][2], int pred_x, int pred_y,
int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, uint16_t * const mv_penalty)
{
int best[2]={0, 0};
int d, dmin;
const int shift= 1+s->quarter_sample;
uint32_t *map= s->me.map;
int map_generation;
const int penalty_factor= s->me.penalty_factor;
const int size=1;
me_cmp_func cmp, chroma_cmp;
LOAD_COMMON((s->mb_x*2 + (block&1))*8, (s->mb_y*2 + (block>>1))*8);
cmp= s->dsp.me_cmp[size];
chroma_cmp= s->dsp.me_cmp[size+1];
map_generation= update_map_generation(s);
dmin = 1000000;
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
/* first line */
if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
}else{
CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
//FIXME try some early stop
if(dmin>64*2){
CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
}
}
if(dmin>64*4){
CHECK_MV(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift)
CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
}
if(s->avctx->dia_size<2)
dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
shift, map, map_generation, size, mv_penalty);
else
dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
shift, map, map_generation, size, mv_penalty);
*mx_ptr= best[0];
*my_ptr= best[1];
// printf("%d %d %d \n", best[0], best[1], dmin);
return dmin;
}
#endif /* !CMP_DIRECT */

@ -526,7 +526,7 @@ void ff_mpeg1_encode_init(MpegEncContext *s)
}
}
}
s->mv_penalty= mv_penalty;
s->me.mv_penalty= mv_penalty;
s->fcode_tab= fcode_tab;
s->min_qcoeff=-255;
s->max_qcoeff= 255;

@ -43,8 +43,6 @@ static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;
static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
int src_x, int src_y, int w, int h);
/* enable all paranoid tests for rounding, overflows, etc... */
@ -64,8 +62,8 @@ static const uint16_t aanscales[64] = {
19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
8867 , 12299, 11585, 10426, 8867, 6967, 4799, 2446,
4520 , 6270, 5906, 5315, 4520, 3552, 2446, 1247
};
/* Input permutation for the simple_idct_mmx */
@ -87,9 +85,6 @@ static const uint8_t h263_chroma_roundtab[16] = {
static UINT16 (*default_mv_penalty)[MAX_MV*2+1]=NULL;
static UINT8 default_fcode_tab[MAX_MV*2+1];
/* default motion estimation */
int motion_estimation_method = ME_EPZS;
static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
const UINT16 *quant_matrix, int bias, int qmin, int qmax)
{
@ -394,15 +389,13 @@ int MPV_common_init(MpegEncContext *s)
CHECKED_ALLOCZ(s->b_back_mv_table , mv_table_size * 2 * sizeof(INT16))
CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(INT16))
CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(INT16))
CHECKED_ALLOCZ(s->b_direct_forw_mv_table, mv_table_size * 2 * sizeof(INT16))
CHECKED_ALLOCZ(s->b_direct_back_mv_table, mv_table_size * 2 * sizeof(INT16))
CHECKED_ALLOCZ(s->b_direct_mv_table , mv_table_size * 2 * sizeof(INT16))
//FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
CHECKED_ALLOCZ(s->me_scratchpad, s->width*2*16*3*sizeof(uint8_t))
CHECKED_ALLOCZ(s->me.scratchpad, s->width*2*16*3*sizeof(uint8_t))
CHECKED_ALLOCZ(s->me_map , ME_MAP_SIZE*sizeof(uint32_t))
CHECKED_ALLOCZ(s->me_score_map, ME_MAP_SIZE*sizeof(uint16_t))
CHECKED_ALLOCZ(s->me.map , ME_MAP_SIZE*sizeof(uint32_t))
CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
if(s->codec_id==CODEC_ID_MPEG4){
CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
@ -498,8 +491,6 @@ void MPV_common_end(MpegEncContext *s)
av_freep(&s->b_back_mv_table);
av_freep(&s->b_bidir_forw_mv_table);
av_freep(&s->b_bidir_back_mv_table);
av_freep(&s->b_direct_forw_mv_table);
av_freep(&s->b_direct_back_mv_table);
av_freep(&s->b_direct_mv_table);
av_freep(&s->motion_val);
av_freep(&s->dc_val[0]);
@ -508,9 +499,9 @@ void MPV_common_end(MpegEncContext *s)
av_freep(&s->mbintra_table);
av_freep(&s->cbp_table);
av_freep(&s->pred_dir_table);
av_freep(&s->me_scratchpad);
av_freep(&s->me_map);
av_freep(&s->me_score_map);
av_freep(&s->me.scratchpad);
av_freep(&s->me.map);
av_freep(&s->me.score_map);
av_freep(&s->mbskip_table);
av_freep(&s->bitstream_buffer);
@ -566,6 +557,7 @@ int MPV_encode_init(AVCodecContext *avctx)
s->chroma_elim_threshold= avctx->chroma_elim_threshold;
s->strict_std_compliance= avctx->strict_std_compliance;
s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
s->mpeg_quant= avctx->mpeg_quant;
if (s->gop_size <= 1) {
@ -575,12 +567,7 @@ int MPV_encode_init(AVCodecContext *avctx)
s->intra_only = 0;
}
/* ME algorithm */
if (avctx->me_method == 0)
/* For compatibility */
s->me_method = motion_estimation_method;
else
s->me_method = avctx->me_method;
s->me_method = avctx->me_method;
/* Fixed QSCALE */
s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
@ -713,13 +700,14 @@ int MPV_encode_init(AVCodecContext *avctx)
}
}
}
s->mv_penalty= default_mv_penalty;
s->me.mv_penalty= default_mv_penalty;
s->fcode_tab= default_fcode_tab;
s->y_dc_scale_table=
s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
/* dont use mv_penalty table for crap MV as it would be confused */
if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty;
//FIXME remove after fixing / removing old ME
if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty;
s->encoding = 1;
@ -727,6 +715,8 @@ int MPV_encode_init(AVCodecContext *avctx)
if (MPV_common_init(s) < 0)
return -1;
ff_init_me(s);
#ifdef CONFIG_ENCODERS
if (s->out_format == FMT_H263)
h263_encode_init(s);
@ -947,6 +937,18 @@ void MPV_frame_end(MpegEncContext *s)
if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/)
s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
}
if(s->avctx->debug&FF_DEBUG_SKIP){
int x,y;
for(y=0; y<s->mb_height; y++){
for(x=0; x<s->mb_width; x++){
int count= s->mbskip_table[x + y*s->mb_width];
if(count>9) count=9;
printf(" %1d", count);
}
printf("\n");
}
printf("pict type: %d\n", s->pict_type);
}
}
static int get_sae(uint8_t *src, int ref, int stride){
@ -1284,7 +1286,7 @@ static inline void gmc1_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (motion_x&15) + 16 > s->h_edge_pos
|| src_y + (motion_y&15) + 16 > s->v_edge_pos){
emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
ff_emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer;
emu=1;
}
@ -1322,14 +1324,14 @@ static inline void gmc1_motion(MpegEncContext *s,
offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
ptr = ref_picture[1] + offset;
if(emu){
emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer;
}
s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
ptr = ref_picture[2] + offset;
if(emu){
emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer;
}
s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
@ -1401,7 +1403,7 @@ static inline void gmc_motion(MpegEncContext *s,
}
static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
int src_x, int src_y, int w, int h){
int x, y;
int start_y, start_x, end_y, end_x;
@ -1501,7 +1503,7 @@ if(s->quarter_sample)
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
|| src_y + (motion_y&1) + h > v_edge_pos){
emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer + src_offset;
emu=1;
@ -1538,7 +1540,7 @@ if(s->quarter_sample)
offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
ptr = ref_picture[1] + offset;
if(emu){
emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer + (src_offset >> 1);
}
@ -1546,7 +1548,7 @@ if(s->quarter_sample)
ptr = ref_picture[2] + offset;
if(emu){
emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer + (src_offset >> 1);
}
@ -1586,7 +1588,7 @@ static inline void qpel_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
|| src_y + (motion_y&3) + h > v_edge_pos){
emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer + src_offset;
emu=1;
@ -1631,7 +1633,7 @@ static inline void qpel_motion(MpegEncContext *s,
offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
ptr = ref_picture[1] + offset;
if(emu){
emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer + (src_offset >> 1);
}
@ -1639,7 +1641,7 @@ static inline void qpel_motion(MpegEncContext *s,
ptr = ref_picture[2] + offset;
if(emu){
emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer + (src_offset >> 1);
}
@ -1675,6 +1677,10 @@ static inline void MPV_motion(MpegEncContext *s,
ref_picture, 0,
0, pix_op, qpix_op,
s->mv[dir][0][0], s->mv[dir][0][1], 16);
}else if(s->mspel){
ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
ref_picture, pix_op,
s->mv[dir][0][0], s->mv[dir][0][1], 16);
}else{
mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
ref_picture, 0,
@ -1706,7 +1712,7 @@ static inline void MPV_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
|| src_y + (motion_y&3) + 8 > s->v_edge_pos){
emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer;
}
}
@ -1737,7 +1743,7 @@ static inline void MPV_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
|| src_y + (motion_y&1) + 8 > s->v_edge_pos){
emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer;
}
}
@ -1784,7 +1790,7 @@ static inline void MPV_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
|| src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer;
emu=1;
}
@ -1793,7 +1799,7 @@ static inline void MPV_motion(MpegEncContext *s,
ptr = ref_picture[2] + offset;
if(emu){
emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer;
}
pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
@ -1928,7 +1934,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
/* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here
//FIXME a lot of thet is only needed for !low_delay
const int wrap = s->block_wrap[0];
const int xy = s->block_index[0];
const int mb_index= s->mb_x + s->mb_y*s->mb_width;
@ -2064,7 +2070,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
add_dequant_dct(s, block[4], 4, dest_cb, s->uvlinesize);
add_dequant_dct(s, block[5], 5, dest_cr, s->uvlinesize);
}
} else {
} else if(s->codec_id != CODEC_ID_WMV2){
add_dct(s, block[0], 0, dest_y, dct_linesize);
add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
@ -2074,6 +2080,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
add_dct(s, block[4], 4, dest_cb, s->uvlinesize);
add_dct(s, block[5], 5, dest_cr, s->uvlinesize);
}
} else{
ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
}
} else {
/* dct only in intra block */
@ -2376,7 +2384,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
ff_emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
ptr= s->edge_emu_buffer;
emu=1;
}
@ -2408,14 +2416,14 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
int wrap_c = s->uvlinesize;
ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
if(emu){
emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ptr= s->edge_emu_buffer;
}
s->dsp.get_pixels(s->block[4], ptr, wrap_c);
ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
if(emu){
emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ptr= s->edge_emu_buffer;
}
s->dsp.get_pixels(s->block[5], ptr, wrap_c);
@ -2455,7 +2463,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
}
if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
ff_emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
ptr_y= s->edge_emu_buffer;
emu=1;
}
@ -2487,12 +2495,12 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
skip_dct[5]= 1;
}else{
if(emu){
emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ff_emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ptr_cb= s->edge_emu_buffer;
}
s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
if(emu){
emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ff_emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ptr_cr= s->edge_emu_buffer;
}
s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
@ -2574,21 +2582,25 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
#ifdef CONFIG_ENCODERS
/* huffman encode */
switch(s->out_format) {
case FMT_MPEG1:
mpeg1_encode_mb(s, s->block, motion_x, motion_y);
break;
case FMT_H263:
if (s->h263_msmpeg4)
msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
else if(s->h263_pred)
mpeg4_encode_mb(s, s->block, motion_x, motion_y);
else
h263_encode_mb(s, s->block, motion_x, motion_y);
break;
case FMT_MJPEG:
mjpeg_encode_mb(s, s->block);
break;
switch(s->codec_id){ //FIXME funct ptr could be slightly faster
case CODEC_ID_MPEG1VIDEO:
mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
case CODEC_ID_MPEG4:
mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
case CODEC_ID_MSMPEG4V2:
case CODEC_ID_MSMPEG4V3:
case CODEC_ID_WMV1:
msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
case CODEC_ID_WMV2:
ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
case CODEC_ID_MJPEG:
mjpeg_encode_mb(s, s->block); break;
case CODEC_ID_H263:
case CODEC_ID_H263P:
case CODEC_ID_RV10:
h263_encode_mb(s, s->block, motion_x, motion_y); break;
default:
assert(0);
}
#endif
}
@ -2704,13 +2716,18 @@ static inline int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, in
int x,y;
if(w==16 && h==16)
return s->dsp.pix_norm(src1, src2, stride);
return s->dsp.sse[0](NULL, src1, src2, stride);
else if(w==8 && h==8)
return s->dsp.sse[1](NULL, src1, src2, stride);
for(y=0; y<h; y++){
for(x=0; x<w; x++){
acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
}
}
assert(acc>=0);
return acc;
}
@ -2751,6 +2768,18 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->scene_change_score=0;
s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
if(s->msmpeg4_version){
if(s->pict_type==I_TYPE)
s->no_rounding=1;
else if(s->flipflop_rounding)
s->no_rounding ^= 1;
}else{
if(s->pict_type==I_TYPE)
s->no_rounding=0;
else if(s->pict_type!=B_TYPE)
s->no_rounding ^= 1;
}
/* Estimate motion for every MB */
if(s->pict_type != I_TYPE){
@ -2772,7 +2801,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
ff_estimate_b_frame_motion(s, mb_x, mb_y);
else
ff_estimate_p_frame_motion(s, mb_x, mb_y);
// s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER;
}
}
}else /* if(s->pict_type == I_TYPE) */{
@ -2867,7 +2895,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
mjpeg_picture_header(s);
break;
case FMT_H263:
if (s->h263_msmpeg4)
if (s->codec_id == CODEC_ID_WMV2)
ff_wmv2_encode_picture_header(s, picture_number);
else if (s->h263_msmpeg4)
msmpeg4_encode_picture_header(s, picture_number);
else if (s->h263_pred)
mpeg4_encode_picture_header(s, picture_number);
@ -3049,15 +3079,14 @@ static void encode_picture(MpegEncContext *s, int picture_number)
&dmin, &next_block, 0, 0);
}
if(mb_type&MB_TYPE_DIRECT){
int mx= s->b_direct_mv_table[xy][0];
int my= s->b_direct_mv_table[xy][1];
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
s->mv_type = MV_TYPE_16X16; //FIXME
s->mb_intra= 0;
s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
ff_mpeg4_set_direct_mv(s, mx, my);
encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb,
&dmin, &next_block, s->b_direct_mv_table[xy][0], s->b_direct_mv_table[xy][1]);
&dmin, &next_block, mx, my);
}
if(mb_type&MB_TYPE_INTRA){
s->mv_dir = MV_DIR_FORWARD;
@ -3122,10 +3151,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->mb_intra= 0;
motion_x=s->b_direct_mv_table[xy][0];
motion_y=s->b_direct_mv_table[xy][1];
s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
break;
case MB_TYPE_BIDIR:
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
@ -3170,7 +3196,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
s->current_picture.error[0] += sse(
s,
s->new_picture .data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
@ -3471,6 +3497,7 @@ char ff_get_pict_type_char(int pict_type){
case P_TYPE: return 'P';
case B_TYPE: return 'B';
case S_TYPE: return 'S';
default: return '?';
}
}
@ -3574,12 +3601,3 @@ AVCodec wmv1_encoder = {
MPV_encode_end,
};
AVCodec wmv2_encoder = {
"wmv2",
CODEC_TYPE_VIDEO,
CODEC_ID_WMV2,
sizeof(MpegEncContext),
MPV_encode_init,
MPV_encode_picture,
MPV_encode_end,
};

@ -129,6 +129,31 @@ typedef struct ParseContext{
int frame_start_found;
} ParseContext;
struct MpegEncContext;
typedef struct MotionEstContext{
int skip; /* set if ME is skiped for the current MB */
int co_located_mv[4][2]; /* mv from last p frame for direct mode ME */
int direct_basis_mv[4][2];
uint8_t *scratchpad; /* data area for the me algo, so that the ME doesnt need to malloc/free */
uint32_t *map; /* map to avoid duplicate evaluations */
uint32_t *score_map; /* map to store the scores */
int map_generation;
int penalty_factor;
int sub_penalty_factor;
UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV */
int (*sub_motion_search)(struct MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int xmin, int ymin, int xmax, int ymax,
int pred_x, int pred_y, Picture *ref_picture,
int n, int size, uint16_t * const mv_penalty);
int (*motion_search[7])(struct MpegEncContext * s, int block,
int *mx_ptr, int *my_ptr,
int P[10][2], int pred_x, int pred_y,
int xmin, int ymin, int xmax, int ymax, Picture *ref_picture,
uint16_t * const mv_penalty);
}MotionEstContext;
typedef struct MpegEncContext {
struct AVCodecContext *avctx;
/* the following parameters must be initialized before encoding */
@ -222,15 +247,8 @@ typedef struct MpegEncContext {
INT16 (*b_back_mv_table)[2]; /* MV table (1MV per MB) backward mode b-frame encoding */
INT16 (*b_bidir_forw_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
INT16 (*b_bidir_back_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
INT16 (*b_direct_forw_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
INT16 (*b_direct_back_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
INT16 (*b_direct_mv_table)[2]; /* MV table (1MV per MB) direct mode b-frame encoding */
int me_method; /* ME algorithm */
uint8_t *me_scratchpad; /* data area for the me algo, so that the ME doesnt need to malloc/free */
uint32_t *me_map; /* map to avoid duplicate evaluations */
uint16_t *me_score_map; /* map to store the SADs */
int me_map_generation;
int skip_me; /* set if ME is skiped for the current MB */
int scene_change_score;
int mv_dir;
#define MV_DIR_BACKWARD 1
@ -250,8 +268,9 @@ typedef struct MpegEncContext {
int mv[2][4][2];
int field_select[2][2];
int last_mv[2][2][2]; /* last MV, used for MV prediction in MPEG1 & B-frame MPEG4 */
UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV, used for ME */
UINT8 *fcode_tab; /* smallest fcode needed for each MV */
MotionEstContext me;
int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...)
for b-frames rounding mode is allways 0 */
@ -458,6 +477,7 @@ typedef struct MpegEncContext {
/* [mb_intra][isChroma][level][run][last] */
int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2];
int inter_intra_pred;
int mspel;
/* decompression specific */
GetBitContext gb;
@ -519,6 +539,7 @@ typedef struct MpegEncContext {
void (*fdct)(DCTELEM *block/* align 16*/);
void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
//FIXME move above funcs into dspContext perhaps
} MpegEncContext;
@ -528,6 +549,9 @@ void MPV_common_end(MpegEncContext *s);
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx);
void MPV_frame_end(MpegEncContext *s);
int MPV_encode_init(AVCodecContext *avctx);
int MPV_encode_end(AVCodecContext *avctx);
int MPV_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data);
#ifdef HAVE_MMX
void MPV_common_init_mmx(MpegEncContext *s);
#endif
@ -553,6 +577,8 @@ void ff_clean_intra_table_entries(MpegEncContext *s);
void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable);
void ff_error_resilience(MpegEncContext *s);
void ff_draw_horiz_band(MpegEncContext *s);
void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
int src_x, int src_y, int w, int h);
char ff_get_pict_type_char(int pict_type);
@ -585,6 +611,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type);
void ff_fix_long_p_mvs(MpegEncContext * s);
void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type);
void ff_init_me(MpegEncContext *s);
/* mpeg12.c */
@ -631,6 +658,11 @@ extern UINT8 ff_mpeg4_y_dc_scale_table[32];
extern UINT8 ff_mpeg4_c_dc_scale_table[32];
extern const INT16 ff_mpeg4_default_intra_matrix[64];
extern const INT16 ff_mpeg4_default_non_intra_matrix[64];
int ff_h263_decode_init(AVCodecContext *avctx);
int ff_h263_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
UINT8 *buf, int buf_size);
int ff_h263_decode_end(AVCodecContext *avctx);
void h263_encode_mb(MpegEncContext *s,
DCTELEM block[6][64],
int motion_x, int motion_y);
@ -667,6 +699,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s);
int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s);
int ff_h263_resync(MpegEncContext *s);
int ff_h263_get_gob_height(MpegEncContext *s);
void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my);
/* rv10.c */
@ -684,7 +717,16 @@ int msmpeg4_decode_picture_header(MpegEncContext * s);
int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size);
int ff_msmpeg4_decode_init(MpegEncContext *s);
void ff_msmpeg4_encode_init(MpegEncContext *s);
int ff_wmv2_decode_picture_header(MpegEncContext * s);
void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr);
void ff_mspel_motion(MpegEncContext *s,
UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
UINT8 **ref_picture, op_pixels_func (*pix_op)[4],
int motion_x, int motion_y, int h);
int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number);
void ff_wmv2_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
int motion_x, int motion_y);
/* mjpegenc.c */
int mjpeg_init(MpegEncContext *s);

@ -48,12 +48,14 @@
#define II_BITRATE 128*1024
#define MBAC_BITRATE 50*1024
#define DEFAULT_INTER_INDEX 3
static UINT32 v2_dc_lum_table[512][2];
static UINT32 v2_dc_chroma_table[512][2];
static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n);
static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded);
int n, int coded, const uint8_t *scantable);
static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
static int msmpeg4_decode_motion(MpegEncContext * s,
int *mx_ptr, int *my_ptr);
@ -63,6 +65,7 @@ static inline void msmpeg4_memsetw(short *tab, int val, int n);
static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run, int level, int intra);
static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
extern UINT32 inverse[256];
@ -160,13 +163,14 @@ static void common_init(MpegEncContext * s)
}
break;
case 4:
case 5:
s->y_dc_scale_table= wmv1_y_dc_scale_table;
s->c_dc_scale_table= wmv1_c_dc_scale_table;
break;
}
if(s->msmpeg4_version==4){
if(s->msmpeg4_version>=4){
ff_init_scantable(s, &s->intra_scantable , wmv1_scantable[1]);
ff_init_scantable(s, &s->intra_h_scantable, wmv1_scantable[2]);
ff_init_scantable(s, &s->intra_v_scantable, wmv1_scantable[3]);
@ -370,9 +374,9 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
s->per_mb_rl_table = 0;
if(s->msmpeg4_version==4)
s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE && s->pict_type==P_TYPE);
//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
if (s->pict_type == I_TYPE) {
s->no_rounding = 1;
s->slice_height= s->mb_height/1;
put_bits(&s->pb, 5, 0x16 + s->mb_height/s->slice_height);
@ -404,12 +408,6 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 1, s->mv_table_index);
}
if(s->flipflop_rounding){
s->no_rounding ^= 1;
}else{
s->no_rounding = 0;
}
}
s->esc3_level_length= 0;
@ -923,7 +921,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
}
/* recalculate block_last_index for M$ wmv1 */
if(s->msmpeg4_version==4 && s->block_last_index[n]>0){
if(s->msmpeg4_version>=4 && s->block_last_index[n]>0){
for(last_index=63; last_index>=0; last_index--){
if(block[scantable[last_index]]) break;
}
@ -975,7 +973,7 @@ else
/* third escape */
put_bits(&s->pb, 1, 0);
put_bits(&s->pb, 1, last);
if(s->msmpeg4_version==4){
if(s->msmpeg4_version>=4){
if(s->esc3_level_length==0){
s->esc3_level_length=8;
s->esc3_run_length= 6;
@ -1014,7 +1012,7 @@ else
/****************************************/
/* decoding stuff */
static VLC mb_non_intra_vlc;
static VLC mb_non_intra_vlc[4];
static VLC mb_intra_vlc;
static VLC dc_lum_vlc[2];
static VLC dc_chroma_vlc[2];
@ -1139,9 +1137,12 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
&mvtab[0][1], 2, 1,
&mvtab[0][0], 2, 1);
init_vlc(&mb_non_intra_vlc, MB_NON_INTRA_VLC_BITS, 128,
&table_mb_non_intra[0][1], 8, 4,
&table_mb_non_intra[0][0], 8, 4);
for(i=0; i<4; i++){
init_vlc(&mb_non_intra_vlc[i], MB_NON_INTRA_VLC_BITS, 128,
&wmv2_inter_table[i][0][1], 8, 4,
&wmv2_inter_table[i][0][0], 8, 4); //FIXME name?
}
init_vlc(&mb_intra_vlc, MB_INTRA_VLC_BITS, 64,
&table_mb_intra[0][1], 4, 2,
&table_mb_intra[0][0], 4, 2);
@ -1167,6 +1168,9 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
case 4:
s->decode_mb= msmpeg4v34_decode_mb;
break;
case 5:
s->decode_mb= wmv2_decode_mb;
break;
}
s->slice_height= s->mb_height; //to avoid 1/0 if the first frame isnt a keyframe
@ -1334,6 +1338,7 @@ return -1;
s->no_rounding = 0;
}
}
//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
s->esc3_level_length= 0;
s->esc3_run_length= 0;
@ -1523,7 +1528,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
}
for (i = 0; i < 6; i++) {
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{
fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1;
@ -1566,7 +1571,7 @@ printf("S ");
}
}
code = get_vlc2(&s->gb, mb_non_intra_vlc.table, MB_NON_INTRA_VLC_BITS, 3);
code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3);
if (code < 0)
return -1;
//s->mb_intra = (code & 0x40) ? 0 : 1;
@ -1628,7 +1633,7 @@ printf("%c", s->ac_pred ? 'A' : 'I');
}
for (i = 0; i < 6; i++) {
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{
fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1;
@ -1639,13 +1644,12 @@ printf("%c", s->ac_pred ? 'A' : 'I');
}
//#define ERROR_DETAILS
static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
int n, int coded)
int n, int coded, const uint8_t *scan_table)
{
int level, i, last, run, run_diff;
int dc_pred_dir;
RLTable *rl;
RL_VLC_ELEM *rl_vlc;
const UINT8 *scan_table;
int qmul, qadd;
if (s->mb_intra) {
@ -1713,7 +1717,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
s->block_last_index[n] = i;
return 0;
}
scan_table = s->inter_scantable.permutated;
if(!scan_table)
scan_table = s->inter_scantable.permutated;
set_stat(ST_INTER_AC);
rl_vlc= rl->rl_vlc[s->qscale];
}
@ -1889,7 +1894,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
i = 63; /* XXX: not optimal */
}
}
if(s->msmpeg4_version==4 && i>0) i=63; //FIXME/XXX optimize
if(s->msmpeg4_version>=4 && i>0) i=63; //FIXME/XXX optimize
s->block_last_index[n] = i;
return 0;
@ -1990,3 +1995,9 @@ static int msmpeg4_decode_motion(MpegEncContext * s,
*my_ptr = my;
return 0;
}
/* cleanest way to support it
* there is too much shared between versions so that we cant have 1 file per version & 1 common
* as allmost everything would be in the common file
*/
#include "wmv2.c"

@ -3,7 +3,7 @@
*/
/* intra picture macro block coded block pattern */
static const UINT16 table_mb_intra[64][2] = {
static const uint16_t table_mb_intra[64][2] = {
{ 0x1, 1 },{ 0x17, 6 },{ 0x9, 5 },{ 0x5, 5 },
{ 0x6, 5 },{ 0x47, 9 },{ 0x20, 7 },{ 0x10, 7 },
{ 0x2, 5 },{ 0x7c, 9 },{ 0x3a, 7 },{ 0x1d, 7 },
@ -23,7 +23,7 @@ static const UINT16 table_mb_intra[64][2] = {
};
/* non intra picture macro block coded block pattern + mb type */
static const UINT32 table_mb_non_intra[128][2] = {
static const uint32_t table_mb_non_intra[128][2] = {
{ 0x40, 7 },{ 0x13c9, 13 },{ 0x9fd, 12 },{ 0x1fc, 15 },
{ 0x9fc, 12 },{ 0xa83, 18 },{ 0x12d34, 17 },{ 0x83bc, 16 },
{ 0x83a, 12 },{ 0x7f8, 17 },{ 0x3fd, 16 },{ 0x3ff, 16 },
@ -60,7 +60,7 @@ static const UINT32 table_mb_non_intra[128][2] = {
/* dc table 0 */
static const UINT32 table0_dc_lum[120][2] = {
static const uint32_t table0_dc_lum[120][2] = {
{ 0x1, 1 },{ 0x1, 2 },{ 0x1, 4 },{ 0x1, 5 },
{ 0x5, 5 },{ 0x7, 5 },{ 0x8, 6 },{ 0xc, 6 },
{ 0x0, 7 },{ 0x2, 7 },{ 0x12, 7 },{ 0x1a, 7 },
@ -93,7 +93,7 @@ static const UINT32 table0_dc_lum[120][2] = {
{ 0x6078c, 24 },{ 0x6078d, 24 },{ 0x6078e, 24 },{ 0x6078f, 24 },
};
static const UINT32 table0_dc_chroma[120][2] = {
static const uint32_t table0_dc_chroma[120][2] = {
{ 0x0, 2 },{ 0x1, 2 },{ 0x5, 3 },{ 0x9, 4 },
{ 0xd, 4 },{ 0x11, 5 },{ 0x1d, 5 },{ 0x1f, 5 },
{ 0x21, 6 },{ 0x31, 6 },{ 0x38, 6 },{ 0x33, 6 },
@ -128,7 +128,7 @@ static const UINT32 table0_dc_chroma[120][2] = {
/* dc table 1 */
static const UINT32 table1_dc_lum[120][2] = {
static const uint32_t table1_dc_lum[120][2] = {
{ 0x2, 2 },{ 0x3, 2 },{ 0x3, 3 },{ 0x2, 4 },
{ 0x5, 4 },{ 0x1, 5 },{ 0x3, 5 },{ 0x8, 5 },
{ 0x0, 6 },{ 0x5, 6 },{ 0xd, 6 },{ 0xf, 6 },
@ -161,7 +161,7 @@ static const UINT32 table1_dc_lum[120][2] = {
{ 0x1e6964, 26 },{ 0x1e6965, 26 },{ 0x1e6966, 26 },{ 0x1e6967, 26 },
};
static const UINT32 table1_dc_chroma[120][2] = {
static const uint32_t table1_dc_chroma[120][2] = {
{ 0x0, 2 },{ 0x1, 2 },{ 0x4, 3 },{ 0x7, 3 },
{ 0xb, 4 },{ 0xd, 4 },{ 0x15, 5 },{ 0x28, 6 },
{ 0x30, 6 },{ 0x32, 6 },{ 0x52, 7 },{ 0x62, 7 },
@ -196,7 +196,7 @@ static const UINT32 table1_dc_chroma[120][2] = {
/* vlc table 0, for intra luma */
static const UINT16 table0_vlc[133][2] = {
static const uint16_t table0_vlc[133][2] = {
{ 0x1, 2 },{ 0x6, 3 },{ 0xf, 4 },{ 0x16, 5 },
{ 0x20, 6 },{ 0x18, 7 },{ 0x8, 8 },{ 0x9a, 8 },
{ 0x56, 9 },{ 0x13e, 9 },{ 0xf0, 10 },{ 0x3a5, 10 },
@ -233,7 +233,7 @@ static const UINT16 table0_vlc[133][2] = {
{ 0x16, 7 },
};
static const INT8 table0_level[132] = {
static const int8_t table0_level[132] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
1, 2, 3, 4, 5, 6, 7, 8,
@ -253,7 +253,7 @@ static const INT8 table0_level[132] = {
1, 1, 1, 1,
};
static const INT8 table0_run[132] = {
static const int8_t table0_run[132] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1,
@ -275,7 +275,7 @@ static const INT8 table0_run[132] = {
/* vlc table 1, for intra chroma and P macro blocks */
static const UINT16 table1_vlc[149][2] = {
static const uint16_t table1_vlc[149][2] = {
{ 0x4, 3 },{ 0x14, 5 },{ 0x17, 7 },{ 0x7f, 8 },
{ 0x154, 9 },{ 0x1f2, 10 },{ 0xbf, 11 },{ 0x65, 12 },
{ 0xaaa, 12 },{ 0x630, 13 },{ 0x1597, 13 },{ 0x3b7, 14 },
@ -316,7 +316,7 @@ static const UINT16 table1_vlc[149][2] = {
{ 0xd, 9 },
};
static const INT8 table1_level[148] = {
static const int8_t table1_level[148] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 1, 2,
3, 4, 5, 6, 7, 8, 9, 1,
@ -338,7 +338,7 @@ static const INT8 table1_level[148] = {
1, 1, 1, 1,
};
static const INT8 table1_run[148] = {
static const int8_t table1_run[148] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 2,
@ -362,7 +362,7 @@ static const INT8 table1_run[148] = {
/* third vlc table */
static const UINT16 table2_vlc[186][2] = {
static const uint16_t table2_vlc[186][2] = {
{ 0x1, 2 },{ 0x5, 3 },{ 0xd, 4 },{ 0x12, 5 },
{ 0xe, 6 },{ 0x15, 7 },{ 0x13, 8 },{ 0x3f, 8 },
{ 0x4b, 9 },{ 0x11f, 9 },{ 0xb8, 10 },{ 0x3e3, 10 },
@ -412,7 +412,7 @@ static const UINT16 table2_vlc[186][2] = {
{ 0x23dc, 14 },{ 0x4a, 9 },
};
static const INT8 table2_level[185] = {
static const int8_t table2_level[185] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 1, 2, 3, 4, 5,
@ -439,7 +439,7 @@ static const INT8 table2_level[185] = {
1,
};
static const INT8 table2_run[185] = {
static const int8_t table2_run[185] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1,
@ -467,7 +467,7 @@ static const INT8 table2_run[185] = {
};
/* second non intra vlc table */
static const UINT16 table4_vlc[169][2] = {
static const uint16_t table4_vlc[169][2] = {
{ 0x0, 3 },{ 0x3, 4 },{ 0xb, 5 },{ 0x14, 6 },
{ 0x3f, 6 },{ 0x5d, 7 },{ 0xa2, 8 },{ 0xac, 9 },
{ 0x16e, 9 },{ 0x20a, 10 },{ 0x2e2, 10 },{ 0x432, 11 },
@ -513,7 +513,7 @@ static const UINT16 table4_vlc[169][2] = {
{ 0x169, 9 },
};
static const INT8 table4_level[168] = {
static const int8_t table4_level[168] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 1,
@ -537,7 +537,7 @@ static const INT8 table4_level[168] = {
1, 1, 1, 1, 1, 1, 1, 1,
};
static const INT8 table4_run[168] = {
static const int8_t table4_run[168] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1,
@ -561,25 +561,25 @@ static const INT8 table4_run[168] = {
29, 30, 31, 32, 33, 34, 35, 36,
};
extern const UINT16 inter_vlc[103][2];
extern const INT8 inter_level[102];
extern const INT8 inter_run[102];
extern const uint16_t inter_vlc[103][2];
extern const int8_t inter_level[102];
extern const int8_t inter_run[102];
extern const UINT16 intra_vlc[103][2];
extern const INT8 intra_level[102];
extern const INT8 intra_run[102];
extern const uint16_t intra_vlc[103][2];
extern const int8_t intra_level[102];
extern const int8_t intra_run[102];
extern const UINT8 DCtab_lum[13][2];
extern const UINT8 DCtab_chrom[13][2];
extern const uint8_t DCtab_lum[13][2];
extern const uint8_t DCtab_chrom[13][2];
extern const UINT8 cbpy_tab[16][2];
extern const UINT8 mvtab[33][2];
extern const uint8_t cbpy_tab[16][2];
extern const uint8_t mvtab[33][2];
extern const UINT8 intra_MCBPC_code[8];
extern const UINT8 intra_MCBPC_bits[8];
extern const uint8_t intra_MCBPC_code[8];
extern const uint8_t intra_MCBPC_bits[8];
extern const UINT8 inter_MCBPC_code[25];
extern const UINT8 inter_MCBPC_bits[25];
extern const uint8_t inter_MCBPC_code[25];
extern const uint8_t inter_MCBPC_bits[25];
#define NB_RL_TABLES 6
@ -632,7 +632,7 @@ static RLTable rl_table[NB_RL_TABLES] = {
/* motion vector table 0 */
static const UINT16 table0_mv_code[1100] = {
static const uint16_t table0_mv_code[1100] = {
0x0001, 0x0003, 0x0005, 0x0007, 0x0003, 0x0008, 0x000c, 0x0001,
0x0002, 0x001b, 0x0006, 0x000b, 0x0015, 0x0002, 0x000e, 0x000f,
0x0014, 0x0020, 0x0022, 0x0025, 0x0027, 0x0029, 0x002d, 0x004b,
@ -773,7 +773,7 @@ static const UINT16 table0_mv_code[1100] = {
0x5f0d, 0x5f0e, 0x5f0f, 0x0000,
};
static const UINT8 table0_mv_bits[1100] = {
static const uint8_t table0_mv_bits[1100] = {
1, 4, 4, 4, 5, 5, 5, 6,
6, 6, 7, 7, 7, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
@ -914,7 +914,7 @@ static const UINT8 table0_mv_bits[1100] = {
17, 17, 17, 8,
};
static const UINT8 table0_mvx[1099] = {
static const uint8_t table0_mvx[1099] = {
32, 32, 31, 32, 33, 31, 33, 31,
33, 32, 34, 32, 30, 32, 31, 34,
35, 32, 34, 33, 29, 33, 30, 30,
@ -1055,7 +1055,7 @@ static const UINT8 table0_mvx[1099] = {
61, 19, 19,
};
static const UINT8 table0_mvy[1099] = {
static const uint8_t table0_mvy[1099] = {
32, 31, 32, 33, 32, 31, 31, 33,
33, 34, 32, 30, 32, 35, 34, 31,
32, 29, 33, 30, 32, 34, 33, 31,
@ -1197,7 +1197,7 @@ static const UINT8 table0_mvy[1099] = {
};
/* motion vector table 1 */
static const UINT16 table1_mv_code[1100] = {
static const uint16_t table1_mv_code[1100] = {
0x0000, 0x0007, 0x0009, 0x000f, 0x000a, 0x0011, 0x001a, 0x001c,
0x0011, 0x0031, 0x0025, 0x002d, 0x002f, 0x006f, 0x0075, 0x0041,
0x004c, 0x004e, 0x005c, 0x0060, 0x0062, 0x0066, 0x0068, 0x0069,
@ -1338,7 +1338,7 @@ static const UINT16 table1_mv_code[1100] = {
0x2473, 0x26a2, 0x26a3, 0x000b,
};
static const UINT8 table1_mv_bits[1100] = {
static const uint8_t table1_mv_bits[1100] = {
2, 4, 4, 4, 5, 5, 5, 5,
6, 6, 7, 7, 7, 7, 7, 8,
8, 8, 8, 8, 8, 8, 8, 8,
@ -1479,7 +1479,7 @@ static const UINT8 table1_mv_bits[1100] = {
15, 15, 15, 4,
};
static const UINT8 table1_mvx[1099] = {
static const uint8_t table1_mvx[1099] = {
32, 31, 32, 31, 33, 32, 33, 33,
31, 34, 30, 32, 32, 34, 35, 32,
34, 33, 29, 30, 30, 32, 31, 31,
@ -1620,7 +1620,7 @@ static const UINT8 table1_mvx[1099] = {
0, 12, 27,
};
static const UINT8 table1_mvy[1099] = {
static const uint8_t table1_mvy[1099] = {
32, 32, 31, 31, 32, 33, 31, 33,
33, 32, 32, 30, 34, 31, 32, 29,
33, 30, 32, 33, 31, 35, 34, 30,
@ -1764,11 +1764,11 @@ static const UINT8 table1_mvy[1099] = {
/* motion vector table */
typedef struct MVTable {
int n;
const UINT16 *table_mv_code;
const UINT8 *table_mv_bits;
const UINT8 *table_mvx;
const UINT8 *table_mvy;
UINT16 *table_mv_index; /* encoding: convert mv to index in table_mv */
const uint16_t *table_mv_code;
const uint8_t *table_mv_bits;
const uint8_t *table_mvx;
const uint8_t *table_mvy;
uint16_t *table_mv_index; /* encoding: convert mv to index in table_mv */
VLC vlc; /* decoding: vlc */
} MVTable;
@ -1789,29 +1789,29 @@ static MVTable mv_tables[2] = {
}
};
static const UINT8 v2_mb_type[8][2] = {
static const uint8_t v2_mb_type[8][2] = {
{1, 1}, {0 , 2}, {3 , 3}, {9 , 5},
{5, 4}, {0x21, 7}, {0x20, 7}, {0x11, 6},
};
static const UINT8 v2_intra_cbpc[4][2] = {
static const uint8_t v2_intra_cbpc[4][2] = {
{1, 1}, {0, 3}, {1, 3}, {1, 2},
};
static UINT8 wmv1_y_dc_scale_table[32]={
static uint8_t wmv1_y_dc_scale_table[32]={
// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
0, 8, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21
};
static UINT8 wmv1_c_dc_scale_table[32]={
static uint8_t wmv1_c_dc_scale_table[32]={
// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22
};
static UINT8 old_ff_y_dc_scale_table[32]={
static uint8_t old_ff_y_dc_scale_table[32]={
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
0, 8, 8, 8, 8,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
};
static UINT8 old_ff_c_dc_scale_table[32]={
static uint8_t old_ff_c_dc_scale_table[32]={
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22
};
@ -1819,7 +1819,7 @@ static UINT8 old_ff_c_dc_scale_table[32]={
#define WMV1_SCANTABLE_COUNT 4
static const UINT8 wmv1_scantable00[64]= {
static const uint8_t wmv1_scantable00[64]= {
0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05,
@ -1829,7 +1829,7 @@ static const UINT8 wmv1_scantable00[64]= {
0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35,
0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F,
};
static const UINT8 wmv1_scantable01[64]= {
static const uint8_t wmv1_scantable01[64]= {
0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D,
@ -1839,7 +1839,7 @@ static const UINT8 wmv1_scantable01[64]= {
0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35,
0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F,
};
static const UINT8 wmv1_scantable02[64]= {
static const uint8_t wmv1_scantable02[64]= {
0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18,
0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20,
0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07,
@ -1849,7 +1849,7 @@ static const UINT8 wmv1_scantable02[64]= {
0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35,
0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
};
static const UINT8 wmv1_scantable03[64]= {
static const uint8_t wmv1_scantable03[64]= {
0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09,
0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29,
0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13,
@ -1860,14 +1860,141 @@ static const UINT8 wmv1_scantable03[64]= {
0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
};
static const UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
static const uint8_t *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
wmv1_scantable00,
wmv1_scantable01,
wmv1_scantable02,
wmv1_scantable03,
};
static UINT8 table_inter_intra[4][2]={
static const uint8_t table_inter_intra[4][2]={
{0,1},{2,2},{6,3},{7,3}
};
#define WMV2_INTER_CBP_TABLE_COUNT 4
static const uint32_t table_mb_non_intra2[128][2] = {
{0x0000A7, 14}, {0x01B2B8, 18}, {0x01B28E, 18}, {0x036575, 19},
{0x006CAC, 16}, {0x000A69, 18}, {0x002934, 20}, {0x00526B, 21},
{0x006CA1, 16}, {0x01B2B9, 18}, {0x0029AD, 20}, {0x029353, 24},
{0x006CA7, 16}, {0x006CAB, 16}, {0x01B2BB, 18}, {0x00029B, 16},
{0x00D944, 17}, {0x000A6A, 18}, {0x0149A8, 23}, {0x03651F, 19},
{0x006CAF, 16}, {0x000A4C, 18}, {0x03651E, 19}, {0x000A48, 18},
{0x00299C, 20}, {0x00299F, 20}, {0x029352, 24}, {0x0029AC, 20},
{0x000296, 16}, {0x00D946, 17}, {0x000A68, 18}, {0x000298, 16},
{0x000527, 17}, {0x00D94D, 17}, {0x0014D7, 19}, {0x036574, 19},
{0x000A5C, 18}, {0x01B299, 18}, {0x00299D, 20}, {0x00299E, 20},
{0x000525, 17}, {0x000A66, 18}, {0x00A4D5, 22}, {0x00149B, 19},
{0x000295, 16}, {0x006CAD, 16}, {0x000A49, 18}, {0x000521, 17},
{0x006CAA, 16}, {0x00D945, 17}, {0x01B298, 18}, {0x00052F, 17},
{0x003654, 15}, {0x006CA0, 16}, {0x000532, 17}, {0x000291, 16},
{0x003652, 15}, {0x000520, 17}, {0x000A5D, 18}, {0x000294, 16},
{0x00009B, 11}, {0x0006E2, 12}, {0x000028, 12}, {0x0001B0, 10},
{0x000001, 3}, {0x000010, 8}, {0x00002F, 6}, {0x00004C, 10},
{0x00000D, 4}, {0x000000, 10}, {0x000006, 9}, {0x000134, 12},
{0x00000C, 4}, {0x000007, 10}, {0x000007, 9}, {0x0006E1, 12},
{0x00000E, 5}, {0x0000DA, 9}, {0x000022, 9}, {0x000364, 11},
{0x00000F, 4}, {0x000006, 10}, {0x00000F, 9}, {0x000135, 12},
{0x000014, 5}, {0x0000DD, 9}, {0x000004, 9}, {0x000015, 11},
{0x00001A, 6}, {0x0001B3, 10}, {0x000005, 10}, {0x0006E3, 12},
{0x00000C, 5}, {0x0000B9, 8}, {0x000004, 8}, {0x0000DB, 9},
{0x00000E, 4}, {0x00000B, 10}, {0x000023, 9}, {0x0006CB, 12},
{0x000005, 6}, {0x0001B1, 10}, {0x000001, 10}, {0x0006E0, 12},
{0x000011, 5}, {0x0000DF, 9}, {0x00000E, 9}, {0x000373, 11},
{0x000003, 5}, {0x0000B8, 8}, {0x000006, 8}, {0x000175, 9},
{0x000015, 5}, {0x000174, 9}, {0x000027, 9}, {0x000372, 11},
{0x000010, 5}, {0x0000BB, 8}, {0x000005, 8}, {0x0000DE, 9},
{0x00000F, 5}, {0x000001, 9}, {0x000012, 8}, {0x000004, 10},
{0x000002, 3}, {0x000016, 5}, {0x000009, 4}, {0x000001, 5},
};
static const uint32_t table_mb_non_intra3[128][2] = {
{0x0002A1, 10}, {0x005740, 15}, {0x01A0BF, 18}, {0x015D19, 17},
{0x001514, 13}, {0x00461E, 15}, {0x015176, 17}, {0x015177, 17},
{0x0011AD, 13}, {0x00682E, 16}, {0x0682F9, 20}, {0x03417D, 19},
{0x001A36, 14}, {0x002A2D, 14}, {0x00D05E, 17}, {0x006824, 16},
{0x001515, 13}, {0x00545C, 15}, {0x0230E9, 18}, {0x011AFA, 17},
{0x0015D7, 13}, {0x005747, 15}, {0x008D79, 16}, {0x006825, 16},
{0x002BA2, 14}, {0x00A8BA, 16}, {0x0235F6, 18}, {0x015D18, 17},
{0x0011AE, 13}, {0x00346F, 15}, {0x008C3B, 16}, {0x00346E, 15},
{0x000D1A, 13}, {0x00461F, 15}, {0x0682F8, 20}, {0x011875, 17},
{0x002BA1, 14}, {0x008D61, 16}, {0x0235F7, 18}, {0x0230E8, 18},
{0x001513, 13}, {0x008D7B, 16}, {0x011AF4, 17}, {0x011AF5, 17},
{0x001185, 13}, {0x0046BF, 15}, {0x008D60, 16}, {0x008D7C, 16},
{0x001512, 13}, {0x00461C, 15}, {0x00AE8D, 16}, {0x008D78, 16},
{0x000D0E, 13}, {0x003413, 15}, {0x0046B1, 15}, {0x003416, 15},
{0x000AEA, 12}, {0x002A2C, 14}, {0x005741, 15}, {0x002A2F, 14},
{0x000158, 9}, {0x0008D2, 12}, {0x00054C, 11}, {0x000686, 12},
{0x000000, 2}, {0x000069, 8}, {0x00006B, 8}, {0x00068C, 12},
{0x000007, 3}, {0x00015E, 9}, {0x0002A3, 10}, {0x000AE9, 12},
{0x000006, 3}, {0x000231, 10}, {0x0002B8, 10}, {0x001A08, 14},
{0x000010, 5}, {0x0001A9, 10}, {0x000342, 11}, {0x000A88, 12},
{0x000004, 4}, {0x0001A2, 10}, {0x0002A4, 10}, {0x001184, 13},
{0x000012, 5}, {0x000232, 10}, {0x0002B2, 10}, {0x000680, 12},
{0x00001B, 6}, {0x00046A, 11}, {0x00068E, 12}, {0x002359, 14},
{0x000016, 5}, {0x00015F, 9}, {0x0002A0, 10}, {0x00054D, 11},
{0x000005, 4}, {0x000233, 10}, {0x0002B9, 10}, {0x0015D6, 13},
{0x000022, 6}, {0x000468, 11}, {0x000683, 12}, {0x001A0A, 14},
{0x000013, 5}, {0x000236, 10}, {0x0002BB, 10}, {0x001186, 13},
{0x000017, 5}, {0x0001AB, 10}, {0x0002A7, 10}, {0x0008D3, 12},
{0x000014, 5}, {0x000237, 10}, {0x000460, 11}, {0x000D0F, 13},
{0x000019, 6}, {0x0001AA, 10}, {0x0002B3, 10}, {0x000681, 12},
{0x000018, 6}, {0x0001A8, 10}, {0x0002A5, 10}, {0x00068F, 12},
{0x000007, 4}, {0x000055, 7}, {0x000047, 7}, {0x0000AD, 8},
};
static const uint32_t table_mb_non_intra4[128][2] = {
{0x0000D4, 8}, {0x0021C5, 14}, {0x00F18A, 16}, {0x00D5BC, 16},
{0x000879, 12}, {0x00354D, 14}, {0x010E3F, 17}, {0x010F54, 17},
{0x000866, 12}, {0x00356E, 14}, {0x010F55, 17}, {0x010E3E, 17},
{0x0010CE, 13}, {0x003C84, 14}, {0x00D5BD, 16}, {0x00F18B, 16},
{0x000868, 12}, {0x00438C, 15}, {0x0087AB, 16}, {0x00790B, 15},
{0x000F10, 12}, {0x00433D, 15}, {0x006AD3, 15}, {0x00790A, 15},
{0x001AA7, 13}, {0x0043D4, 15}, {0x00871E, 16}, {0x006ADF, 15},
{0x000D7C, 12}, {0x003C94, 14}, {0x00438D, 15}, {0x006AD2, 15},
{0x0006BC, 11}, {0x0021E9, 14}, {0x006ADA, 15}, {0x006A99, 15},
{0x0010F7, 13}, {0x004389, 15}, {0x006ADB, 15}, {0x0078C4, 15},
{0x000D56, 12}, {0x0035F7, 14}, {0x00438E, 15}, {0x006A98, 15},
{0x000D52, 12}, {0x003C95, 14}, {0x004388, 15}, {0x00433C, 15},
{0x000D54, 12}, {0x001E4B, 13}, {0x003C63, 14}, {0x003C83, 14},
{0x000861, 12}, {0x0021EB, 14}, {0x00356C, 14}, {0x0035F6, 14},
{0x000863, 12}, {0x00219F, 14}, {0x003568, 14}, {0x003C82, 14},
{0x0001AE, 9}, {0x0010C0, 13}, {0x000F11, 12}, {0x001AFA, 13},
{0x000000, 1}, {0x0000F0, 8}, {0x0001AD, 9}, {0x0010C1, 13},
{0x00000A, 4}, {0x0003C5, 10}, {0x000789, 11}, {0x001AB5, 13},
{0x000009, 4}, {0x000435, 11}, {0x000793, 11}, {0x001E40, 13},
{0x00001D, 5}, {0x0003CB, 10}, {0x000878, 12}, {0x001AAF, 13},
{0x00000B, 4}, {0x0003C7, 10}, {0x000791, 11}, {0x001AAB, 13},
{0x00001F, 5}, {0x000436, 11}, {0x0006BF, 11}, {0x000F19, 12},
{0x00003D, 6}, {0x000D51, 12}, {0x0010C4, 13}, {0x0021E8, 14},
{0x000036, 6}, {0x000437, 11}, {0x0006AF, 11}, {0x0010C5, 13},
{0x00000C, 4}, {0x000432, 11}, {0x000794, 11}, {0x001E30, 13},
{0x000042, 7}, {0x000870, 12}, {0x000F24, 12}, {0x001E43, 13},
{0x000020, 6}, {0x00043E, 11}, {0x000795, 11}, {0x001AAA, 13},
{0x000037, 6}, {0x0006AC, 11}, {0x0006AE, 11}, {0x0010F6, 13},
{0x000034, 6}, {0x00043A, 11}, {0x000D50, 12}, {0x001AAE, 13},
{0x000039, 6}, {0x00043F, 11}, {0x00078D, 11}, {0x0010D2, 13},
{0x000038, 6}, {0x00043B, 11}, {0x0006BD, 11}, {0x0010D3, 13},
{0x000011, 5}, {0x0001AC, 9}, {0x0000F3, 8}, {0x000439, 11},
};
static const uint32_t (*wmv2_inter_table[WMV2_INTER_CBP_TABLE_COUNT])[2]={
table_mb_non_intra2,
table_mb_non_intra3,
table_mb_non_intra4,
table_mb_non_intra,
};
static const uint8_t wmv2_scantableA[64]={
0x00, 0x01, 0x02, 0x08, 0x03, 0x09, 0x0A, 0x10,
0x04, 0x0B, 0x11, 0x18, 0x12, 0x0C, 0x05, 0x13,
0x19, 0x0D, 0x14, 0x1A, 0x1B, 0x06, 0x15, 0x1C,
0x0E, 0x16, 0x1D, 0x07, 0x1E, 0x0F, 0x17, 0x1F,
};
static const uint8_t wmv2_scantableB[64]={
0x00, 0x08, 0x01, 0x10, 0x09, 0x18, 0x11, 0x02,
0x20, 0x0A, 0x19, 0x28, 0x12, 0x30, 0x21, 0x1A,
0x38, 0x29, 0x22, 0x03, 0x31, 0x39, 0x0B, 0x2A,
0x13, 0x32, 0x1B, 0x3A, 0x23, 0x2B, 0x33, 0x3B,
};

@ -473,3 +473,93 @@ void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block)
idct4col(dest + line_size + i, 2 * line_size, block + 8 + i);
}
}
/* 8x4 & 4x8 WMV2 IDCT */
#undef CN_SHIFT
#undef C_SHIFT
#undef C_FIX
#undef C1
#undef C2
#define CN_SHIFT 12
#define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
#define C1 C_FIX(0.6532814824)
#define C2 C_FIX(0.2705980501)
#define C3 C_FIX(0.5)
#define C_SHIFT (4+1+12)
static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
const UINT8 *cm = cropTbl + MAX_NEG_CROP;
a0 = col[8*0];
a1 = col[8*1];
a2 = col[8*2];
a3 = col[8*3];
c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
c1 = a1 * C1 + a3 * C2;
c3 = a1 * C2 - a3 * C1;
dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
dest += line_size;
dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
dest += line_size;
dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
dest += line_size;
dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
}
#define RN_SHIFT 15
#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
#define R1 R_FIX(0.6532814824)
#define R2 R_FIX(0.2705980501)
#define R3 R_FIX(0.5)
#define R_SHIFT 11
static inline void idct4row(INT16 *row)
{
int c0, c1, c2, c3, a0, a1, a2, a3;
const UINT8 *cm = cropTbl + MAX_NEG_CROP;
a0 = row[0];
a1 = row[1];
a2 = row[2];
a3 = row[3];
c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
c1 = a1 * R1 + a3 * R2;
c3 = a1 * R2 - a3 * R1;
row[0]= (c0 + c1) >> R_SHIFT;
row[1]= (c2 + c3) >> R_SHIFT;
row[2]= (c2 - c3) >> R_SHIFT;
row[3]= (c0 - c1) >> R_SHIFT;
}
void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block)
{
int i;
/* IDCT8 on each line */
for(i=0; i<4; i++) {
idctRowCondDC(block + i*8);
}
/* IDCT4 and store */
for(i=0;i<8;i++) {
idct4col_add(dest + i, line_size, block + i);
}
}
void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block)
{
int i;
/* IDCT4 on each line */
for(i=0; i<8; i++) {
idct4row(block + i*8);
}
/* IDCT8 and store */
for(i=0; i<4; i++){
idctSparseColAdd(dest + i, line_size, block + i);
}
}

@ -26,3 +26,6 @@ void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block);
void simple_idct(short *block);
void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block);
void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block);
void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block);

@ -0,0 +1,850 @@
/*
* Copyright (c) 2002 The FFmpeg Project.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include "simple_idct.h"
#define SKIP_TYPE_NONE 0
#define SKIP_TYPE_MPEG 1
#define SKIP_TYPE_ROW 2
#define SKIP_TYPE_COL 3
typedef struct Wmv2Context{
MpegEncContext s;
int j_type_bit;
int j_type;
int flag3;
int flag63;
int abt_flag;
int abt_type;
int abt_type_table[6];
int per_mb_abt;
int per_block_abt;
int mspel_bit;
int cbp_table_index;
int top_left_mv_flag;
int per_mb_rl_bit;
int skip_type;
int hshift;
ScanTable abt_scantable[2];
DCTELEM abt_block2[6][64] __align8;
}Wmv2Context;
static void wmv2_common_init(Wmv2Context * w){
MpegEncContext * const s= &w->s;
ff_init_scantable(s, &w->abt_scantable[0], wmv2_scantableA);
ff_init_scantable(s, &w->abt_scantable[1], wmv2_scantableB);
}
static int encode_ext_header(Wmv2Context *w){
MpegEncContext * const s= &w->s;
PutBitContext pb;
int code;
init_put_bits(&pb, s->avctx->extradata, s->avctx->extradata_size, NULL, NULL);
put_bits(&pb, 5, s->frame_rate / FRAME_RATE_BASE); //yes 29.97 -> 29
put_bits(&pb, 11, FFMIN(s->bit_rate/1024, 2047));
put_bits(&pb, 1, w->mspel_bit=1);
put_bits(&pb, 1, w->flag3=1);
put_bits(&pb, 1, w->abt_flag=1);
put_bits(&pb, 1, w->j_type_bit=1);
put_bits(&pb, 1, w->top_left_mv_flag=0);
put_bits(&pb, 1, w->per_mb_rl_bit=1);
put_bits(&pb, 3, code=1);
flush_put_bits(&pb);
s->slice_height = s->mb_height / code;
return 0;
}
static int wmv2_encode_init(AVCodecContext *avctx){
Wmv2Context * const w= avctx->priv_data;
if(MPV_encode_init(avctx) < 0)
return -1;
wmv2_common_init(w);
avctx->extradata_size= 4;
avctx->extradata= av_mallocz(avctx->extradata_size + 10);
encode_ext_header(w);
return 0;
}
static int wmv2_encode_end(AVCodecContext *avctx){
Wmv2Context * const w= avctx->priv_data;
if(MPV_encode_end(avctx) < 0)
return -1;
avctx->extradata_size= 0;
av_freep(&avctx->extradata);
return 0;
}
int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number)
{
Wmv2Context * const w= (Wmv2Context*)s;
put_bits(&s->pb, 1, s->pict_type - 1);
if(s->pict_type == I_TYPE){
put_bits(&s->pb, 7, 0);
}
put_bits(&s->pb, 5, s->qscale);
s->dc_table_index = 1;
s->mv_table_index = 1; /* only if P frame */
// s->use_skip_mb_code = 1; /* only if P frame */
s->per_mb_rl_table = 0;
s->mspel= 0;
w->per_mb_abt=0;
w->abt_type=0;
w->j_type=0;
if (s->pict_type == I_TYPE) {
if(w->j_type_bit) put_bits(&s->pb, 1, w->j_type);
if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table);
if(!s->per_mb_rl_table){
code012(&s->pb, s->rl_chroma_table_index);
code012(&s->pb, s->rl_table_index);
}
put_bits(&s->pb, 1, s->dc_table_index);
s->inter_intra_pred= 0;
s->no_rounding = 1;
}else{
int cbp_index;
put_bits(&s->pb, 2, SKIP_TYPE_NONE);
code012(&s->pb, cbp_index=0);
if(s->qscale <= 10){
int map[3]= {0,2,1};
w->cbp_table_index= map[cbp_index];
}else if(s->qscale <= 20){
int map[3]= {1,0,2};
w->cbp_table_index= map[cbp_index];
}else{
int map[3]= {2,1,0};
w->cbp_table_index= map[cbp_index];
}
if(w->mspel_bit) put_bits(&s->pb, 1, s->mspel);
if(w->abt_flag){
put_bits(&s->pb, 1, w->per_mb_abt^1);
if(!w->per_mb_abt){
code012(&s->pb, w->abt_type);
}
}
if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table);
if(!s->per_mb_rl_table){
code012(&s->pb, s->rl_table_index);
s->rl_chroma_table_index = s->rl_table_index;
}
put_bits(&s->pb, 1, s->dc_table_index);
put_bits(&s->pb, 1, s->mv_table_index);
s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
s->no_rounding ^= 1;
}
s->esc3_level_length= 0;
s->esc3_run_length= 0;
return 0;
}
// nearly idential to wmv1 but thats just because we dont use the useless M$ crap features
// its duplicated here in case someone wants to add support for these carp features
void ff_wmv2_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
int motion_x, int motion_y)
{
Wmv2Context * const w= (Wmv2Context*)s;
int cbp, coded_cbp, i;
int pred_x, pred_y;
UINT8 *coded_block;
handle_slices(s);
if (!s->mb_intra) {
/* compute cbp */
set_stat(ST_INTER_MB);
cbp = 0;
for (i = 0; i < 6; i++) {
if (s->block_last_index[i] >= 0)
cbp |= 1 << (5 - i);
}
put_bits(&s->pb,
wmv2_inter_table[w->cbp_table_index][cbp + 64][1],
wmv2_inter_table[w->cbp_table_index][cbp + 64][0]);
/* motion vector */
h263_pred_motion(s, 0, &pred_x, &pred_y);
msmpeg4_encode_motion(s, motion_x - pred_x,
motion_y - pred_y);
} else {
/* compute cbp */
cbp = 0;
coded_cbp = 0;
for (i = 0; i < 6; i++) {
int val, pred;
val = (s->block_last_index[i] >= 1);
cbp |= val << (5 - i);
if (i < 4) {
/* predict value for close blocks only for luma */
pred = coded_block_pred(s, i, &coded_block);
*coded_block = val;
val = val ^ pred;
}
coded_cbp |= val << (5 - i);
}
#if 0
if (coded_cbp)
printf("cbp=%x %x\n", cbp, coded_cbp);
#endif
if (s->pict_type == I_TYPE) {
set_stat(ST_INTRA_MB);
put_bits(&s->pb,
table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]);
} else {
put_bits(&s->pb,
wmv2_inter_table[w->cbp_table_index][cbp][1],
wmv2_inter_table[w->cbp_table_index][cbp][0]);
}
set_stat(ST_INTRA_MB);
put_bits(&s->pb, 1, 0); /* no AC prediction yet */
if(s->inter_intra_pred){
s->h263_aic_dir=0;
put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
}
}
for (i = 0; i < 6; i++) {
msmpeg4_encode_block(s, block[i], i);
}
}
static void parse_mb_skip(Wmv2Context * w){
int mb_x, mb_y;
MpegEncContext * const s= &w->s;
w->skip_type= get_bits(&s->gb, 2);
switch(w->skip_type){
case SKIP_TYPE_NONE:
for(mb_y=0; mb_y<s->mb_height; mb_y++){
for(mb_x=0; mb_x<s->mb_width; mb_x++){
s->mb_type[mb_y*s->mb_width + mb_x]= 0;
}
}
break;
case SKIP_TYPE_MPEG:
for(mb_y=0; mb_y<s->mb_height; mb_y++){
for(mb_x=0; mb_x<s->mb_width; mb_x++){
s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0;
}
}
break;
case SKIP_TYPE_ROW:
for(mb_y=0; mb_y<s->mb_height; mb_y++){
if(get_bits1(&s->gb)){
for(mb_x=0; mb_x<s->mb_width; mb_x++){
s->mb_type[mb_y*s->mb_width + mb_x]= MB_TYPE_SKIPED;
}
}else{
for(mb_x=0; mb_x<s->mb_width; mb_x++){
s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0;
}
}
}
break;
case SKIP_TYPE_COL:
for(mb_x=0; mb_x<s->mb_width; mb_x++){
if(get_bits1(&s->gb)){
for(mb_y=0; mb_y<s->mb_height; mb_y++){
s->mb_type[mb_y*s->mb_width + mb_x]= MB_TYPE_SKIPED;
}
}else{
for(mb_y=0; mb_y<s->mb_height; mb_y++){
s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0;
}
}
}
break;
}
}
static int decode_ext_header(Wmv2Context *w){
MpegEncContext * const s= &w->s;
GetBitContext gb;
int fps;
int code;
if(s->avctx->extradata_size<4) return -1;
init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size);
fps = get_bits(&gb, 5);
s->bit_rate = get_bits(&gb, 11)*1024;
w->mspel_bit = get_bits1(&gb);
w->flag3 = get_bits1(&gb);
w->abt_flag = get_bits1(&gb);
w->j_type_bit = get_bits1(&gb);
w->top_left_mv_flag= get_bits1(&gb);
w->per_mb_rl_bit = get_bits1(&gb);
code = get_bits(&gb, 3);
if(code==0) return -1;
s->slice_height = s->mb_height / code;
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
printf("fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, tl_mv_flag:%d, mbrl_bit:%d, code:%d, flag3:%d\n",
fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit, w->top_left_mv_flag, w->per_mb_rl_bit, code, w->flag3);
}
return 0;
}
int ff_wmv2_decode_picture_header(MpegEncContext * s)
{
Wmv2Context * const w= (Wmv2Context*)s;
int code, i;
#if 0
{
int i;
for(i=0; i<s->gb.size*8; i++)
printf("%d", get_bits1(&s->gb));
// get_bits1(&s->gb);
printf("END\n");
return -1;
}
#endif
if(s->picture_number==0)
decode_ext_header(w);
s->pict_type = get_bits(&s->gb, 1) + 1;
if(s->pict_type == I_TYPE){
code = get_bits(&s->gb, 7);
printf("I7:%X/\n", code);
}
s->qscale = get_bits(&s->gb, 5);
if (s->pict_type == I_TYPE) {
if(w->j_type_bit) w->j_type= get_bits1(&s->gb);
else w->j_type= 0; //FIXME check
if(!w->j_type){
if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb);
else s->per_mb_rl_table= 0;
if(!s->per_mb_rl_table){
s->rl_chroma_table_index = decode012(&s->gb);
s->rl_table_index = decode012(&s->gb);
}
s->dc_table_index = get_bits1(&s->gb);
}
s->inter_intra_pred= 0;
s->no_rounding = 1;
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
printf("qscale:%d rlc:%d rl:%d dc:%d mbrl:%d j_type:%d \n",
s->qscale,
s->rl_chroma_table_index,
s->rl_table_index,
s->dc_table_index,
s->per_mb_rl_table,
w->j_type);
}
}else{
int cbp_index;
w->j_type=0;
parse_mb_skip(w);
cbp_index= decode012(&s->gb);
if(s->qscale <= 10){
int map[3]= {0,2,1};
w->cbp_table_index= map[cbp_index];
}else if(s->qscale <= 20){
int map[3]= {1,0,2};
w->cbp_table_index= map[cbp_index];
}else{
int map[3]= {2,1,0};
w->cbp_table_index= map[cbp_index];
}
if(w->mspel_bit) s->mspel= get_bits1(&s->gb);
else s->mspel= 0; //FIXME check
if(w->abt_flag){
w->per_mb_abt= get_bits1(&s->gb)^1;
if(!w->per_mb_abt){
w->abt_type= decode012(&s->gb);
}
}
if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb);
else s->per_mb_rl_table= 0;
if(!s->per_mb_rl_table){
s->rl_table_index = decode012(&s->gb);
s->rl_chroma_table_index = s->rl_table_index;
}
s->dc_table_index = get_bits1(&s->gb);
s->mv_table_index = get_bits1(&s->gb);
s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
s->no_rounding ^= 1;
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
printf("rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d mspel:%d per_mb_abt:%d abt_type:%d cbp:%d ii:%d\n",
s->rl_table_index,
s->rl_chroma_table_index,
s->dc_table_index,
s->mv_table_index,
s->per_mb_rl_table,
s->qscale,
s->mspel,
w->per_mb_abt,
w->abt_type,
w->cbp_table_index,
s->inter_intra_pred);
}
}
s->esc3_level_length= 0;
s->esc3_run_length= 0;
if(s->avctx->debug&FF_DEBUG_SKIP){
for(i=0; i<s->mb_num; i++){
if(i%s->mb_width==0) printf("\n");
printf("%d", s->mb_type[i]);
}
}
s->picture_number++; //FIXME ?
// if(w->j_type)
// return wmv2_decode_j_picture(w); //FIXME
if(w->j_type){
printf("J-type picture isnt supported\n");
return -1;
}
return 0;
}
void ff_wmv2_decode_init(MpegEncContext *s){
}
static inline int wmv2_decode_motion(Wmv2Context *w, int *mx_ptr, int *my_ptr){
MpegEncContext * const s= &w->s;
int ret;
ret= msmpeg4_decode_motion(s, mx_ptr, my_ptr);
if(ret<0) return -1;
if((((*mx_ptr)|(*my_ptr)) & 1) && s->mspel)
w->hshift= get_bits1(&s->gb);
else
w->hshift= 0;
//printf("%d %d ", *mx_ptr, *my_ptr);
return 0;
}
static int16_t *wmv2_pred_motion(Wmv2Context *w, int *px, int *py){
MpegEncContext * const s= &w->s;
int xy, wrap, diff, type;
INT16 *A, *B, *C, *mot_val;
wrap = s->block_wrap[0];
xy = s->block_index[0];
mot_val = s->motion_val[xy];
A = s->motion_val[xy - 1];
B = s->motion_val[xy - wrap];
C = s->motion_val[xy + 2 - wrap];
diff= FFMAX(ABS(A[0] - B[0]), ABS(A[1] - B[1]));
if(s->mb_x && s->mb_y && !s->mspel && w->top_left_mv_flag && diff >= 8)
//FIXME top/left bit too if y=!0 && first_slice_line?
type= get_bits1(&s->gb);
else
type= 2;
if(type == 0){
*px= A[0];
*py= A[1];
}else if(type == 1){
*px= B[0];
*py= B[1];
}else{
/* special case for first (slice) line */
if (s->first_slice_line) {
*px = A[0];
*py = A[1];
} else {
*px = mid_pred(A[0], B[0], C[0]);
*py = mid_pred(A[1], B[1], C[1]);
}
}
return mot_val;
}
static inline int wmv2_decode_inter_block(Wmv2Context *w, DCTELEM *block, int n, int cbp){
MpegEncContext * const s= &w->s;
static const int sub_cbp_table[3]= {2,3,1};
int sub_cbp;
if(!cbp){
s->block_last_index[n] = -1;
return 0;
}
if(w->per_block_abt)
w->abt_type= decode012(&s->gb);
#if 0
if(w->per_block_abt)
printf("B%d", w->abt_type);
#endif
w->abt_type_table[n]= w->abt_type;
if(w->abt_type){
// const uint8_t *scantable= w->abt_scantable[w->abt_type-1].permutated;
const uint8_t *scantable= w->abt_scantable[w->abt_type-1].scantable;
// const uint8_t *scantable= w->abt_type-1 ? w->abt_scantable[1].permutated : w->abt_scantable[0].scantable;
sub_cbp= sub_cbp_table[ decode012(&s->gb) ];
// printf("S%d", sub_cbp);
if(sub_cbp&1){
if (msmpeg4_decode_block(s, block, n, 1, scantable) < 0)
return -1;
}
if(sub_cbp&2){
if (msmpeg4_decode_block(s, w->abt_block2[n], n, 1, scantable) < 0)
return -1;
}
s->block_last_index[n] = 63;
return 0;
}else{
return msmpeg4_decode_block(s, block, n, 1, s->inter_scantable.permutated);
}
}
static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int stride, int n){
MpegEncContext * const s= &w->s;
uint8_t temp[2][64];
int i;
if(w->abt_type_table[n] && 0){
int a,b;
a= block1[0];
b= w->abt_block2[n][0];
block1[0]= a+b;
w->abt_block2[n][0]= a-b;
}
switch(w->abt_type_table[n]){
case 0:
if (s->block_last_index[n] >= 0) {
s->idct_add (dst, stride, block1);
}
break;
case 1:
simple_idct84_add(dst , stride, block1);
simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]);
memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
break;
case 2:
simple_idct48_add(dst , stride, block1);
simple_idct48_add(dst + 4 , stride, w->abt_block2[n]);
memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
break;
default:
fprintf(stderr, "internal error in WMV2 abt\n");
}
}
void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block1[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr){
Wmv2Context * const w= (Wmv2Context*)s;
wmv2_add_block(w, block1[0], dest_y , s->linesize, 0);
wmv2_add_block(w, block1[1], dest_y + 8 , s->linesize, 1);
wmv2_add_block(w, block1[2], dest_y + 8*s->linesize, s->linesize, 2);
wmv2_add_block(w, block1[3], dest_y + 8 + 8*s->linesize, s->linesize, 3);
if(s->flags&CODEC_FLAG_GRAY) return;
wmv2_add_block(w, block1[4], dest_cb , s->uvlinesize, 4);
wmv2_add_block(w, block1[5], dest_cr , s->uvlinesize, 5);
}
void ff_mspel_motion(MpegEncContext *s,
UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
UINT8 **ref_picture, op_pixels_func (*pix_op)[4],
int motion_x, int motion_y, int h)
{
Wmv2Context * const w= (Wmv2Context*)s;
UINT8 *ptr;
int dxy, offset, mx, my, src_x, src_y, v_edge_pos, linesize, uvlinesize;
int emu=0;
dxy = ((motion_y & 1) << 1) | (motion_x & 1);
dxy = 2*dxy + w->hshift;
src_x = s->mb_x * 16 + (motion_x >> 1);
src_y = s->mb_y * 16 + (motion_y >> 1);
/* WARNING: do no forget half pels */
v_edge_pos = s->v_edge_pos;
src_x = clip(src_x, -16, s->width);
src_y = clip(src_y, -16, s->height);
linesize = s->linesize;
uvlinesize = s->uvlinesize;
ptr = ref_picture[0] + (src_y * linesize) + src_x;
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<1 || src_y<1 || src_x + 17 >= s->h_edge_pos
|| src_y + h+1 >= v_edge_pos){
ff_emulated_edge_mc(s, ptr - 1 - s->linesize, s->linesize, 19, 19,
src_x-1, src_y-1, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer + 1 + s->linesize;
emu=1;
}
}
s->dsp.put_mspel_pixels_tab[dxy](dest_y , ptr , linesize);
s->dsp.put_mspel_pixels_tab[dxy](dest_y+8 , ptr+8 , linesize);
s->dsp.put_mspel_pixels_tab[dxy](dest_y +8*linesize, ptr +8*linesize, linesize);
s->dsp.put_mspel_pixels_tab[dxy](dest_y+8+8*linesize, ptr+8+8*linesize, linesize);
if(s->flags&CODEC_FLAG_GRAY) return;
if (s->out_format == FMT_H263) {
dxy = 0;
if ((motion_x & 3) != 0)
dxy |= 1;
if ((motion_y & 3) != 0)
dxy |= 2;
mx = motion_x >> 2;
my = motion_y >> 2;
} else {
mx = motion_x / 2;
my = motion_y / 2;
dxy = ((my & 1) << 1) | (mx & 1);
mx >>= 1;
my >>= 1;
}
src_x = s->mb_x * 8 + mx;
src_y = s->mb_y * 8 + my;
src_x = clip(src_x, -8, s->width >> 1);
if (src_x == (s->width >> 1))
dxy &= ~1;
src_y = clip(src_y, -8, s->height >> 1);
if (src_y == (s->height >> 1))
dxy &= ~2;
offset = (src_y * uvlinesize) + src_x;
ptr = ref_picture[1] + offset;
if(emu){
ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9,
src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer;
}
pix_op[1][dxy](dest_cb, ptr, uvlinesize, h >> 1);
ptr = ref_picture[2] + offset;
if(emu){
ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9,
src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer;
}
pix_op[1][dxy](dest_cr, ptr, uvlinesize, h >> 1);
}
static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
{
Wmv2Context * const w= (Wmv2Context*)s;
int cbp, code, i;
UINT8 *coded_val;
if(w->j_type) return 0;
s->error_status_table[s->mb_x + s->mb_y*s->mb_width]= 0;
if (s->pict_type == P_TYPE) {
if(s->mb_type[s->mb_y * s->mb_width + s->mb_x]&MB_TYPE_SKIPED){
/* skip mb */
s->mb_intra = 0;
for(i=0;i<6;i++)
s->block_last_index[i] = -1;
s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_16X16;
s->mv[0][0][0] = 0;
s->mv[0][0][1] = 0;
s->mb_skiped = 1;
return 0;
}
code = get_vlc2(&s->gb, mb_non_intra_vlc[w->cbp_table_index].table, MB_NON_INTRA_VLC_BITS, 3);
if (code < 0)
return -1;
s->mb_intra = (~code & 0x40) >> 6;
cbp = code & 0x3f;
} else {
s->mb_intra = 1;
code = get_vlc2(&s->gb, mb_intra_vlc.table, MB_INTRA_VLC_BITS, 2);
if (code < 0){
fprintf(stderr, "II-cbp illegal at %d %d\n", s->mb_x, s->mb_y);
return -1;
}
/* predict coded block pattern */
cbp = 0;
for(i=0;i<6;i++) {
int val = ((code >> (5 - i)) & 1);
if (i < 4) {
int pred = coded_block_pred(s, i, &coded_val);
val = val ^ pred;
*coded_val = val;
}
cbp |= val << (5 - i);
}
}
if (!s->mb_intra) {
int mx, my;
//printf("P at %d %d\n", s->mb_x, s->mb_y);
wmv2_pred_motion(w, &mx, &my);
if(cbp){
if(s->per_mb_rl_table){
s->rl_table_index = decode012(&s->gb);
s->rl_chroma_table_index = s->rl_table_index;
}
if(w->abt_flag && w->per_mb_abt){
w->per_block_abt= get_bits1(&s->gb);
if(!w->per_block_abt)
w->abt_type= decode012(&s->gb);
}else
w->per_block_abt=0;
}
if (wmv2_decode_motion(w, &mx, &my) < 0)
return -1;
s->mv_dir = MV_DIR_FORWARD;
s->mv_type = MV_TYPE_16X16;
s->mv[0][0][0] = mx;
s->mv[0][0][1] = my;
for (i = 0; i < 6; i++) {
if (wmv2_decode_inter_block(w, block[i], i, (cbp >> (5 - i)) & 1) < 0)
{
fprintf(stderr,"\nerror while decoding inter block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1;
}
}
} else {
//if(s->pict_type==P_TYPE)
// printf("%d%d ", s->inter_intra_pred, cbp);
//printf("I at %d %d %d %06X\n", s->mb_x, s->mb_y, ((cbp&3)? 1 : 0) +((cbp&0x3C)? 2 : 0), show_bits(&s->gb, 24));
s->ac_pred = get_bits1(&s->gb);
if(s->inter_intra_pred){
s->h263_aic_dir= get_vlc2(&s->gb, inter_intra_vlc.table, INTER_INTRA_VLC_BITS, 1);
// printf("%d%d %d %d/", s->ac_pred, s->h263_aic_dir, s->mb_x, s->mb_y);
}
if(s->per_mb_rl_table && cbp){
s->rl_table_index = decode012(&s->gb);
s->rl_chroma_table_index = s->rl_table_index;
}
for (i = 0; i < 6; i++) {
if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{
fprintf(stderr,"\nerror while decoding intra block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1;
}
}
}
return 0;
}
static int wmv2_decode_init(AVCodecContext *avctx){
Wmv2Context * const w= avctx->priv_data;
if(ff_h263_decode_init(avctx) < 0)
return -1;
wmv2_common_init(w);
return 0;
}
AVCodec wmv2_decoder = {
"wmv2",
CODEC_TYPE_VIDEO,
CODEC_ID_WMV2,
sizeof(Wmv2Context),
wmv2_decode_init,
NULL,
ff_h263_decode_end,
ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
AVCodec wmv2_encoder = {
"wmv2",
CODEC_TYPE_VIDEO,
CODEC_ID_WMV2,
sizeof(Wmv2Context),
wmv2_encode_init,
MPV_encode_picture,
MPV_encode_end,
};
Loading…
Cancel
Save