Split cavlc out of h264.c.

Seems to speed the code up a little...
The placement of many generic functions between h264.c and h264.h is still open
Currently they are a little randomly placed between them.

Originally committed as revision 21178 to svn://svn.ffmpeg.org/ffmpeg/trunk
release/0.6
Michael Niedermayer 15 years ago
parent 08f8b51f69
commit e1e949026e
  1. 4
      libavcodec/Makefile
  2. 1439
      libavcodec/h264.c
  3. 596
      libavcodec/h264.h
  4. 1053
      libavcodec/h264_cavlc.c
  5. 167
      libavcodec/h264data.h

@ -132,7 +132,7 @@ OBJS-$(CONFIG_H263_ENCODER) += mpegvideo_enc.o mpeg4video.o mpeg4vide
ratecontrol.o h263.o ituh263enc.o flvenc.o mpeg12data.o \ ratecontrol.o h263.o ituh263enc.o flvenc.o mpeg12data.o \
mpegvideo.o error_resilience.o mpegvideo.o error_resilience.o
OBJS-$(CONFIG_H264_DECODER) += h264.o h264idct.o h264pred.o h264_loopfilter.o h264_direct.o cabac.o \ OBJS-$(CONFIG_H264_DECODER) += h264.o h264idct.o h264pred.o h264_loopfilter.o h264_direct.o cabac.o \
h264_sei.o h264_ps.o h264_refs.o\ h264_sei.o h264_ps.o h264_refs.o h264_cavlc.o\
mpegvideo.o error_resilience.o mpegvideo.o error_resilience.o
OBJS-$(CONFIG_H264_ENCODER) += h264enc.o h264dspenc.o OBJS-$(CONFIG_H264_ENCODER) += h264enc.o h264dspenc.o
OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o
@ -283,7 +283,7 @@ OBJS-$(CONFIG_SVQ1_ENCODER) += svq1enc.o svq1.o \
motion_est.o h263.o \ motion_est.o h263.o \
mpegvideo.o error_resilience.o mpegvideo.o error_resilience.o
OBJS-$(CONFIG_SVQ3_DECODER) += h264.o svq3.o h264idct.o h264pred.o h264_loopfilter.o h264_direct.o \ OBJS-$(CONFIG_SVQ3_DECODER) += h264.o svq3.o h264idct.o h264pred.o h264_loopfilter.o h264_direct.o \
h264_sei.o h264_ps.o h264_refs.o\ h264_sei.o h264_ps.o h264_refs.o h264_cavlc.o\
cabac.o \ cabac.o \
mpegvideo.o error_resilience.o \ mpegvideo.o error_resilience.o \
svq1dec.o svq1.o h263.o svq1dec.o svq1.o h263.o

File diff suppressed because it is too large Load Diff

@ -32,6 +32,7 @@
#include "cabac.h" #include "cabac.h"
#include "mpegvideo.h" #include "mpegvideo.h"
#include "h264pred.h" #include "h264pred.h"
#include "rectangle.h"
#define interlaced_dct interlaced_dct_is_a_bad_name #define interlaced_dct interlaced_dct_is_a_bad_name
#define mb_intra mb_intra_is_not_initialized_see_mb_type #define mb_intra mb_intra_is_not_initialized_see_mb_type
@ -642,6 +643,13 @@ void ff_h264_hl_decode_mb(H264Context *h);
int ff_h264_frame_start(H264Context *h); int ff_h264_frame_start(H264Context *h);
av_cold int ff_h264_decode_init(AVCodecContext *avctx); av_cold int ff_h264_decode_init(AVCodecContext *avctx);
av_cold int ff_h264_decode_end(AVCodecContext *avctx); av_cold int ff_h264_decode_end(AVCodecContext *avctx);
av_cold void ff_h264_decode_init_vlc(void);
/**
* decodes a macroblock
* @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
*/
int ff_h264_decode_mb_cavlc(H264Context *h);
void ff_h264_direct_dist_scale_factor(H264Context * const h); void ff_h264_direct_dist_scale_factor(H264Context * const h);
void ff_h264_direct_ref_list_init(H264Context * const h); void ff_h264_direct_ref_list_init(H264Context * const h);
@ -694,4 +702,592 @@ static inline int get_chroma_qp(H264Context *h, int t, int qscale){
return h->pps.chroma_qp_table[t][qscale]; return h->pps.chroma_qp_table[t][qscale];
} }
static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
static void fill_caches(H264Context *h, int mb_type, int for_deblock){
MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy;
int topleft_xy, top_xy, topright_xy, left_xy[2];
int topleft_type, top_type, topright_type, left_type[2];
const uint8_t * left_block;
int topleft_partition= -1;
int i;
static const uint8_t left_block_options[4][8]={
{0,1,2,3,7,10,8,11},
{2,2,3,3,8,11,8,11},
{0,0,1,1,7,10,7,10},
{0,2,0,2,7,10,7,10}
};
top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
//FIXME deblocking could skip the intra and nnz parts.
if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
return;
/* Wow, what a mess, why didn't they simplify the interlacing & intra
* stuff, I can't imagine that these complex rules are worth it. */
topleft_xy = top_xy - 1;
topright_xy= top_xy + 1;
left_xy[1] = left_xy[0] = mb_xy-1;
left_block = left_block_options[0];
if(FRAME_MBAFF){
const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
const int top_pair_xy = pair_xy - s->mb_stride;
const int topleft_pair_xy = top_pair_xy - 1;
const int topright_pair_xy = top_pair_xy + 1;
const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
const int curr_mb_field_flag = IS_INTERLACED(mb_type);
const int bottom = (s->mb_y & 1);
tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
top_xy -= s->mb_stride;
}
if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
topleft_xy -= s->mb_stride;
} else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
topleft_xy += s->mb_stride;
// take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
topleft_partition = 0;
}
if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
topright_xy -= s->mb_stride;
}
if (left_mb_field_flag != curr_mb_field_flag) {
left_xy[1] = left_xy[0] = pair_xy - 1;
if (curr_mb_field_flag) {
left_xy[1] += s->mb_stride;
left_block = left_block_options[3];
} else {
left_block= left_block_options[2 - bottom];
}
}
}
h->top_mb_xy = top_xy;
h->left_mb_xy[0] = left_xy[0];
h->left_mb_xy[1] = left_xy[1];
if(for_deblock){
topleft_type = 0;
topright_type = 0;
top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
if(MB_MBAFF && !IS_INTRA(mb_type)){
int list;
for(list=0; list<h->list_count; list++){
//These values where changed for ease of performing MC, we need to change them back
//FIXME maybe we can make MC and loop filter use the same values or prevent
//the MC code from changing ref_cache and rather use a temporary array.
if(USES_LIST(mb_type,list)){
int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
*(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
*(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
ref += h->b8_stride;
*(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
*(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
}
}
}
}else{
topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
if(IS_INTRA(mb_type)){
int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
h->topleft_samples_available=
h->top_samples_available=
h->left_samples_available= 0xFFFF;
h->topright_samples_available= 0xEEEA;
if(!(top_type & type_mask)){
h->topleft_samples_available= 0xB3FF;
h->top_samples_available= 0x33FF;
h->topright_samples_available= 0x26EA;
}
if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
if(IS_INTERLACED(mb_type)){
if(!(left_type[0] & type_mask)){
h->topleft_samples_available&= 0xDFFF;
h->left_samples_available&= 0x5FFF;
}
if(!(left_type[1] & type_mask)){
h->topleft_samples_available&= 0xFF5F;
h->left_samples_available&= 0xFF5F;
}
}else{
int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
assert(left_xy[0] == left_xy[1]);
if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
h->topleft_samples_available&= 0xDF5F;
h->left_samples_available&= 0x5F5F;
}
}
}else{
if(!(left_type[0] & type_mask)){
h->topleft_samples_available&= 0xDF5F;
h->left_samples_available&= 0x5F5F;
}
}
if(!(topleft_type & type_mask))
h->topleft_samples_available&= 0x7FFF;
if(!(topright_type & type_mask))
h->topright_samples_available&= 0xFBFF;
if(IS_INTRA4x4(mb_type)){
if(IS_INTRA4x4(top_type)){
h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
}else{
int pred;
if(!(top_type & type_mask))
pred= -1;
else{
pred= 2;
}
h->intra4x4_pred_mode_cache[4+8*0]=
h->intra4x4_pred_mode_cache[5+8*0]=
h->intra4x4_pred_mode_cache[6+8*0]=
h->intra4x4_pred_mode_cache[7+8*0]= pred;
}
for(i=0; i<2; i++){
if(IS_INTRA4x4(left_type[i])){
h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
}else{
int pred;
if(!(left_type[i] & type_mask))
pred= -1;
else{
pred= 2;
}
h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
}
}
}
}
}
/*
0 . T T. T T T T
1 L . .L . . . .
2 L . .L . . . .
3 . T TL . . . .
4 L . .L . . . .
5 L . .. . . . .
*/
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
if(top_type){
h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
}else{
h->non_zero_count_cache[4+8*0]=
h->non_zero_count_cache[5+8*0]=
h->non_zero_count_cache[6+8*0]=
h->non_zero_count_cache[7+8*0]=
h->non_zero_count_cache[1+8*0]=
h->non_zero_count_cache[2+8*0]=
h->non_zero_count_cache[1+8*3]=
h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
}
for (i=0; i<2; i++) {
if(left_type[i]){
h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
}else{
h->non_zero_count_cache[3+8*1 + 2*8*i]=
h->non_zero_count_cache[3+8*2 + 2*8*i]=
h->non_zero_count_cache[0+8*1 + 8*i]=
h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
}
}
if( h->pps.cabac ) {
// top_cbp
if(top_type) {
h->top_cbp = h->cbp_table[top_xy];
} else if(IS_INTRA(mb_type)) {
h->top_cbp = 0x1C0;
} else {
h->top_cbp = 0;
}
// left_cbp
if (left_type[0]) {
h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
} else if(IS_INTRA(mb_type)) {
h->left_cbp = 0x1C0;
} else {
h->left_cbp = 0;
}
if (left_type[0]) {
h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
}
if (left_type[1]) {
h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
}
}
#if 1
if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
int list;
for(list=0; list<h->list_count; list++){
if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
/*if(!h->mv_cache_clean[list]){
memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
h->mv_cache_clean[list]= 1;
}*/
continue;
}
h->mv_cache_clean[list]= 0;
if(USES_LIST(top_type, list)){
const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
*(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
*(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
*(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
*(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
h->ref_cache[list][scan8[0] + 0 - 1*8]=
h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
h->ref_cache[list][scan8[0] + 2 - 1*8]=
h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
}else{
*(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
*(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
*(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
*(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
*(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
}
for(i=0; i<2; i++){
int cache_idx = scan8[0] - 1 + i*2*8;
if(USES_LIST(left_type[i], list)){
const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
*(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
*(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
}else{
*(uint32_t*)h->mv_cache [list][cache_idx ]=
*(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
h->ref_cache[list][cache_idx ]=
h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
}
}
if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
continue;
if(USES_LIST(topleft_type, list)){
const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
*(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
}else{
*(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
}
if(USES_LIST(topright_type, list)){
const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
*(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
}else{
*(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
}
if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
continue;
h->ref_cache[list][scan8[5 ]+1] =
h->ref_cache[list][scan8[7 ]+1] =
h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
h->ref_cache[list][scan8[4 ]] =
h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
*(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
*(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
*(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
*(uint32_t*)h->mv_cache [list][scan8[4 ]]=
*(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
if( h->pps.cabac ) {
/* XXX beurk, Load mvd */
if(USES_LIST(top_type, list)){
const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
*(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
*(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
*(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
*(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
}else{
*(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
*(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
*(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
*(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
}
if(USES_LIST(left_type[0], list)){
const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
*(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
*(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
}else{
*(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
*(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
}
if(USES_LIST(left_type[1], list)){
const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
*(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
*(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
}else{
*(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
*(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
}
*(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
*(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
*(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
*(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
*(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
if(h->slice_type_nos == FF_B_TYPE){
fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
if(IS_DIRECT(top_type)){
*(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
}else if(IS_8X8(top_type)){
int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
}else{
*(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
}
if(IS_DIRECT(left_type[0]))
h->direct_cache[scan8[0] - 1 + 0*8]= 1;
else if(IS_8X8(left_type[0]))
h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
else
h->direct_cache[scan8[0] - 1 + 0*8]= 0;
if(IS_DIRECT(left_type[1]))
h->direct_cache[scan8[0] - 1 + 2*8]= 1;
else if(IS_8X8(left_type[1]))
h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
else
h->direct_cache[scan8[0] - 1 + 2*8]= 0;
}
}
if(FRAME_MBAFF){
#define MAP_MVS\
MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
if(MB_FIELD){
#define MAP_F2F(idx, mb_type)\
if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
h->ref_cache[list][idx] <<= 1;\
h->mv_cache[list][idx][1] /= 2;\
h->mvd_cache[list][idx][1] /= 2;\
}
MAP_MVS
#undef MAP_F2F
}else{
#define MAP_F2F(idx, mb_type)\
if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
h->ref_cache[list][idx] >>= 1;\
h->mv_cache[list][idx][1] <<= 1;\
h->mvd_cache[list][idx][1] <<= 1;\
}
MAP_MVS
#undef MAP_F2F
}
}
}
}
#endif
h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
}
/**
* gets the predicted intra4x4 prediction mode.
*/
static inline int pred_intra_mode(H264Context *h, int n){
const int index8= scan8[n];
const int left= h->intra4x4_pred_mode_cache[index8 - 1];
const int top = h->intra4x4_pred_mode_cache[index8 - 8];
const int min= FFMIN(left, top);
tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
if(min<0) return DC_PRED;
else return min;
}
static inline void write_back_non_zero_count(H264Context *h){
const int mb_xy= h->mb_xy;
h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
}
static inline void write_back_motion(H264Context *h, int mb_type){
MpegEncContext * const s = &h->s;
const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
int list;
if(!USES_LIST(mb_type, 0))
fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
for(list=0; list<h->list_count; list++){
int y;
if(!USES_LIST(mb_type, list))
continue;
for(y=0; y<4; y++){
*(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
*(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
}
if( h->pps.cabac ) {
if(IS_SKIP(mb_type))
fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
else
for(y=0; y<4; y++){
*(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
*(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
}
}
{
int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
}
}
if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
if(IS_8X8(mb_type)){
uint8_t *direct_table = &h->direct_table[b8_xy];
direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
}
}
}
static inline int get_dct8x8_allowed(H264Context *h){
if(h->sps.direct_8x8_inference_flag)
return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
else
return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
}
static void predict_field_decoding_flag(H264Context *h){
MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy;
int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
? s->current_picture.mb_type[mb_xy-1]
: (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
? s->current_picture.mb_type[mb_xy-s->mb_stride]
: 0;
h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
}
/**
* decodes a P_SKIP or B_SKIP macroblock
*/
static void decode_mb_skip(H264Context *h){
MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy;
int mb_type=0;
memset(h->non_zero_count[mb_xy], 0, 16);
memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
if(MB_FIELD)
mb_type|= MB_TYPE_INTERLACED;
if( h->slice_type_nos == FF_B_TYPE )
{
// just for fill_caches. pred_direct_motion will set the real mb_type
mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
ff_h264_pred_direct_motion(h, &mb_type);
mb_type|= MB_TYPE_SKIP;
}
else
{
int mx, my;
mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
pred_pskip_motion(h, &mx, &my);
fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
}
write_back_motion(h, mb_type);
s->current_picture.mb_type[mb_xy]= mb_type;
s->current_picture.qscale_table[mb_xy]= s->qscale;
h->slice_table[ mb_xy ]= h->slice_num;
h->prev_mb_skipped= 1;
}
#endif /* AVCODEC_H264_H */ #endif /* AVCODEC_H264_H */

File diff suppressed because it is too large Load Diff

@ -38,173 +38,6 @@
static const uint8_t golomb_to_pict_type[5]= static const uint8_t golomb_to_pict_type[5]=
{FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE}; {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
static const uint8_t golomb_to_intra4x4_cbp[48]={
47, 31, 15, 0, 23, 27, 29, 30, 7, 11, 13, 14, 39, 43, 45, 46,
16, 3, 5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44, 1, 2, 4,
8, 17, 18, 20, 24, 6, 9, 22, 25, 32, 33, 34, 36, 40, 38, 41
};
static const uint8_t golomb_to_inter_cbp[48]={
0, 16, 1, 2, 4, 8, 32, 3, 5, 10, 12, 15, 47, 7, 11, 13,
14, 6, 9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
};
static const uint8_t golomb_to_inter_cbp_gray[16]={
0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
};
static const uint8_t golomb_to_intra4x4_cbp_gray[16]={
15, 0, 7,11,13,14, 3, 5,10,12, 1, 2, 4, 8, 6, 9,
};
static const uint8_t chroma_dc_coeff_token_len[4*5]={
2, 0, 0, 0,
6, 1, 0, 0,
6, 6, 3, 0,
6, 7, 7, 6,
6, 8, 8, 7,
};
static const uint8_t chroma_dc_coeff_token_bits[4*5]={
1, 0, 0, 0,
7, 1, 0, 0,
4, 6, 1, 0,
3, 3, 2, 5,
2, 3, 2, 0,
};
static const uint8_t coeff_token_len[4][4*17]={
{
1, 0, 0, 0,
6, 2, 0, 0, 8, 6, 3, 0, 9, 8, 7, 5, 10, 9, 8, 6,
11,10, 9, 7, 13,11,10, 8, 13,13,11, 9, 13,13,13,10,
14,14,13,11, 14,14,14,13, 15,15,14,14, 15,15,15,14,
16,15,15,15, 16,16,16,15, 16,16,16,16, 16,16,16,16,
},
{
2, 0, 0, 0,
6, 2, 0, 0, 6, 5, 3, 0, 7, 6, 6, 4, 8, 6, 6, 4,
8, 7, 7, 5, 9, 8, 8, 6, 11, 9, 9, 6, 11,11,11, 7,
12,11,11, 9, 12,12,12,11, 12,12,12,11, 13,13,13,12,
13,13,13,13, 13,14,13,13, 14,14,14,13, 14,14,14,14,
},
{
4, 0, 0, 0,
6, 4, 0, 0, 6, 5, 4, 0, 6, 5, 5, 4, 7, 5, 5, 4,
7, 5, 5, 4, 7, 6, 6, 4, 7, 6, 6, 4, 8, 7, 7, 5,
8, 8, 7, 6, 9, 8, 8, 7, 9, 9, 8, 8, 9, 9, 9, 8,
10, 9, 9, 9, 10,10,10,10, 10,10,10,10, 10,10,10,10,
},
{
6, 0, 0, 0,
6, 6, 0, 0, 6, 6, 6, 0, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
}
};
static const uint8_t coeff_token_bits[4][4*17]={
{
1, 0, 0, 0,
5, 1, 0, 0, 7, 4, 1, 0, 7, 6, 5, 3, 7, 6, 5, 3,
7, 6, 5, 4, 15, 6, 5, 4, 11,14, 5, 4, 8,10,13, 4,
15,14, 9, 4, 11,10,13,12, 15,14, 9,12, 11,10,13, 8,
15, 1, 9,12, 11,14,13, 8, 7,10, 9,12, 4, 6, 5, 8,
},
{
3, 0, 0, 0,
11, 2, 0, 0, 7, 7, 3, 0, 7,10, 9, 5, 7, 6, 5, 4,
4, 6, 5, 6, 7, 6, 5, 8, 15, 6, 5, 4, 11,14,13, 4,
15,10, 9, 4, 11,14,13,12, 8,10, 9, 8, 15,14,13,12,
11,10, 9,12, 7,11, 6, 8, 9, 8,10, 1, 7, 6, 5, 4,
},
{
15, 0, 0, 0,
15,14, 0, 0, 11,15,13, 0, 8,12,14,12, 15,10,11,11,
11, 8, 9,10, 9,14,13, 9, 8,10, 9, 8, 15,14,13,13,
11,14,10,12, 15,10,13,12, 11,14, 9,12, 8,10,13, 8,
13, 7, 9,12, 9,12,11,10, 5, 8, 7, 6, 1, 4, 3, 2,
},
{
3, 0, 0, 0,
0, 1, 0, 0, 4, 5, 6, 0, 8, 9,10,11, 12,13,14,15,
16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31,
32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47,
48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63,
}
};
static const uint8_t total_zeros_len[16][16]= {
{1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
{3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
{4,3,3,3,4,4,3,3,4,5,5,6,5,6},
{5,3,4,4,3,3,3,4,3,4,5,5,5},
{4,4,4,3,3,3,3,3,4,5,4,5},
{6,5,3,3,3,3,3,3,4,3,6},
{6,5,3,3,3,2,3,4,3,6},
{6,4,5,3,2,2,3,3,6},
{6,6,4,2,2,3,2,5},
{5,5,3,2,2,2,4},
{4,4,3,3,1,3},
{4,4,2,1,3},
{3,3,1,2},
{2,2,1},
{1,1},
};
static const uint8_t total_zeros_bits[16][16]= {
{1,3,2,3,2,3,2,3,2,3,2,3,2,3,2,1},
{7,6,5,4,3,5,4,3,2,3,2,3,2,1,0},
{5,7,6,5,4,3,4,3,2,3,2,1,1,0},
{3,7,5,4,6,5,4,3,3,2,2,1,0},
{5,4,3,7,6,5,4,3,2,1,1,0},
{1,1,7,6,5,4,3,2,1,1,0},
{1,1,5,4,3,3,2,1,1,0},
{1,1,1,3,3,2,2,1,0},
{1,0,1,3,2,1,1,1},
{1,0,1,3,2,1,1},
{0,1,1,2,1,3},
{0,1,1,1,1},
{0,1,1,1},
{0,1,1},
{0,1},
};
static const uint8_t chroma_dc_total_zeros_len[3][4]= {
{ 1, 2, 3, 3,},
{ 1, 2, 2, 0,},
{ 1, 1, 0, 0,},
};
static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
{ 1, 1, 1, 0,},
{ 1, 1, 0, 0,},
{ 1, 0, 0, 0,},
};
static const uint8_t run_len[7][16]={
{1,1},
{1,2,2},
{2,2,2,2},
{2,2,2,3,3},
{2,2,3,3,3,3},
{2,3,3,3,3,3,3},
{3,3,3,3,3,3,3,4,5,6,7,8,9,10,11},
};
static const uint8_t run_bits[7][16]={
{1,0},
{1,1,0},
{3,2,1,0},
{3,2,1,1,0},
{3,2,3,2,1,0},
{3,0,1,3,2,5,4},
{7,6,5,4,3,2,1,1,1,1,1,1,1,1,1},
};
static const uint8_t zigzag_scan[16]={ static const uint8_t zigzag_scan[16]={
0+0*4, 1+0*4, 0+1*4, 0+2*4, 0+0*4, 1+0*4, 0+1*4, 0+2*4,
1+1*4, 2+0*4, 3+0*4, 2+1*4, 1+1*4, 2+0*4, 3+0*4, 2+1*4,

Loading…
Cancel
Save