From 92c6c2a605f9b077b8fbc25b7ed6625541232b87 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Sat, 17 Jan 2015 18:45:29 +0100 Subject: [PATCH] h264: split weighted pred-related vars into per-slice context --- libavcodec/dxva2_h264.c | 21 ++++----- libavcodec/h264.c | 83 +++++++++++++++++++++++------------ libavcodec/h264.h | 38 +++++++++------- libavcodec/h264_mb.c | 69 +++++++++++++++-------------- libavcodec/h264_mb_template.c | 10 ++--- libavcodec/h264_mc_template.c | 36 ++++++++------- libavcodec/h264_parser.c | 9 +++- libavcodec/h264_refs.c | 10 ++--- libavcodec/h264_slice.c | 69 ++++++++++++++++------------- libavcodec/svq3.c | 2 +- libavcodec/vaapi_h264.c | 26 ++++++----- 11 files changed, 214 insertions(+), 159 deletions(-) diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c index 4132cd7ed9..cb033a2e92 100644 --- a/libavcodec/dxva2_h264.c +++ b/libavcodec/dxva2_h264.c @@ -211,6 +211,7 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, const DXVA_PicParams_H264 *pp, unsigned position, unsigned size) { const H264Context *h = avctx->priv_data; + H264SliceContext *sl = &h->slice_ctx[0]; struct dxva_context *ctx = avctx->hwaccel_context; unsigned list; @@ -225,8 +226,8 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, slice->slice_type = ff_h264_get_slice_type(h); if (h->slice_type_fixed) slice->slice_type += 5; - slice->luma_log2_weight_denom = h->luma_log2_weight_denom; - slice->chroma_log2_weight_denom = h->chroma_log2_weight_denom; + slice->luma_log2_weight_denom = sl->luma_log2_weight_denom; + slice->chroma_log2_weight_denom = sl->chroma_log2_weight_denom; if (h->list_count > 0) slice->num_ref_idx_l0_active_minus1 = h->ref_count[0] - 1; if (h->list_count > 1) @@ -250,15 +251,15 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, r->reference == PICT_BOTTOM_FIELD); for (plane = 0; plane < 3; plane++) { int w, o; - if (plane == 0 && h->luma_weight_flag[list]) { - w = h->luma_weight[i][list][0]; - o = h->luma_weight[i][list][1]; - } else if (plane >= 1 && h->chroma_weight_flag[list]) { - w = h->chroma_weight[i][list][plane-1][0]; - o = h->chroma_weight[i][list][plane-1][1]; + if (plane == 0 && sl->luma_weight_flag[list]) { + w = sl->luma_weight[i][list][0]; + o = sl->luma_weight[i][list][1]; + } else if (plane >= 1 && sl->chroma_weight_flag[list]) { + w = sl->chroma_weight[i][list][plane-1][0]; + o = sl->chroma_weight[i][list][plane-1][1]; } else { - w = 1 << (plane == 0 ? h->luma_log2_weight_denom : - h->chroma_log2_weight_denom); + w = 1 << (plane == 0 ? sl->luma_log2_weight_denom : + sl->chroma_log2_weight_denom); o = 0; } slice->Weights[list][i][plane][0] = w; diff --git a/libavcodec/h264.c b/libavcodec/h264.c index de192501fa..5eee4d9d90 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -74,7 +74,7 @@ static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type, fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4); assert(!FRAME_MBAFF(h)); - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, &h->slice_ctx[0]); } void ff_h264_draw_horiz_band(H264Context *h, int y, int height) @@ -642,7 +642,17 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx) h->pixel_shift = 0; h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8; + h->nb_slice_ctx = (avctx->active_thread_type & FF_THREAD_SLICE) ? H264_MAX_THREADS : 1; + h->slice_ctx = av_mallocz_array(h->nb_slice_ctx, sizeof(*h->slice_ctx)); + if (!h->slice_ctx) { + h->nb_slice_ctx = 0; + return AVERROR(ENOMEM); + } + h->thread_context[0] = h; + for (i = 0; i < h->nb_slice_ctx; i++) + h->slice_ctx[i].h264 = h->thread_context[0]; + h->outputed_poc = h->next_outputed_poc = INT_MIN; for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) h->last_pocs[i] = INT_MIN; @@ -679,12 +689,23 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx) static int decode_init_thread_copy(AVCodecContext *avctx) { H264Context *h = avctx->priv_data; + int i; if (!avctx->internal->is_copy) return 0; memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); + h->nb_slice_ctx = (avctx->active_thread_type & FF_THREAD_SLICE) ? H264_MAX_THREADS : 1; + h->slice_ctx = av_mallocz_array(h->nb_slice_ctx, sizeof(*h->slice_ctx)); + if (!h->slice_ctx) { + h->nb_slice_ctx = 0; + return AVERROR(ENOMEM); + } + + for (i = 0; i < h->nb_slice_ctx; i++) + h->slice_ctx[i].h264 = h; + h->avctx = avctx; h->rbsp_buffer[0] = NULL; h->rbsp_buffer[1] = NULL; @@ -976,37 +997,37 @@ static void decode_postinit(H264Context *h, int setup_finished) ff_thread_finish_setup(h->avctx); } -int ff_pred_weight_table(H264Context *h) +int ff_pred_weight_table(H264Context *h, H264SliceContext *sl) { int list, i; int luma_def, chroma_def; - h->use_weight = 0; - h->use_weight_chroma = 0; - h->luma_log2_weight_denom = get_ue_golomb(&h->gb); + sl->use_weight = 0; + sl->use_weight_chroma = 0; + sl->luma_log2_weight_denom = get_ue_golomb(&h->gb); if (h->sps.chroma_format_idc) - h->chroma_log2_weight_denom = get_ue_golomb(&h->gb); - luma_def = 1 << h->luma_log2_weight_denom; - chroma_def = 1 << h->chroma_log2_weight_denom; + sl->chroma_log2_weight_denom = get_ue_golomb(&h->gb); + luma_def = 1 << sl->luma_log2_weight_denom; + chroma_def = 1 << sl->chroma_log2_weight_denom; for (list = 0; list < 2; list++) { - h->luma_weight_flag[list] = 0; - h->chroma_weight_flag[list] = 0; + sl->luma_weight_flag[list] = 0; + sl->chroma_weight_flag[list] = 0; for (i = 0; i < h->ref_count[list]; i++) { int luma_weight_flag, chroma_weight_flag; luma_weight_flag = get_bits1(&h->gb); if (luma_weight_flag) { - h->luma_weight[i][list][0] = get_se_golomb(&h->gb); - h->luma_weight[i][list][1] = get_se_golomb(&h->gb); - if (h->luma_weight[i][list][0] != luma_def || - h->luma_weight[i][list][1] != 0) { - h->use_weight = 1; - h->luma_weight_flag[list] = 1; + sl->luma_weight[i][list][0] = get_se_golomb(&h->gb); + sl->luma_weight[i][list][1] = get_se_golomb(&h->gb); + if (sl->luma_weight[i][list][0] != luma_def || + sl->luma_weight[i][list][1] != 0) { + sl->use_weight = 1; + sl->luma_weight_flag[list] = 1; } } else { - h->luma_weight[i][list][0] = luma_def; - h->luma_weight[i][list][1] = 0; + sl->luma_weight[i][list][0] = luma_def; + sl->luma_weight[i][list][1] = 0; } if (h->sps.chroma_format_idc) { @@ -1014,19 +1035,19 @@ int ff_pred_weight_table(H264Context *h) if (chroma_weight_flag) { int j; for (j = 0; j < 2; j++) { - h->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb); - h->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb); - if (h->chroma_weight[i][list][j][0] != chroma_def || - h->chroma_weight[i][list][j][1] != 0) { - h->use_weight_chroma = 1; - h->chroma_weight_flag[list] = 1; + sl->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb); + sl->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb); + if (sl->chroma_weight[i][list][j][0] != chroma_def || + sl->chroma_weight[i][list][j][1] != 0) { + sl->use_weight_chroma = 1; + sl->chroma_weight_flag[list] = 1; } } } else { int j; for (j = 0; j < 2; j++) { - h->chroma_weight[i][list][j][0] = chroma_def; - h->chroma_weight[i][list][j][1] = 0; + sl->chroma_weight[i][list][j][0] = chroma_def; + sl->chroma_weight[i][list][j][1] = 0; } } } @@ -1034,7 +1055,7 @@ int ff_pred_weight_table(H264Context *h) if (h->slice_type_nos != AV_PICTURE_TYPE_B) break; } - h->use_weight = h->use_weight || h->use_weight_chroma; + sl->use_weight = sl->use_weight || sl->use_weight_chroma; return 0; } @@ -1401,6 +1422,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size, { AVCodecContext *const avctx = h->avctx; H264Context *hx; ///< thread context + H264SliceContext *sl; int buf_index; unsigned context_count; int next_avc; @@ -1446,6 +1468,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size, } hx = h->thread_context[context_count]; + sl = &h->slice_ctx[context_count]; ptr = ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index); @@ -1505,7 +1528,7 @@ again: hx->intra_gb_ptr = hx->inter_gb_ptr = &hx->gb; - if ((err = ff_h264_decode_slice_header(hx, h))) + if ((err = ff_h264_decode_slice_header(hx, sl, h))) break; if (h->sei_recovery_frame_cnt >= 0 && h->recovery_frame < 0) { @@ -1622,6 +1645,7 @@ again: h->nal_unit_type = hx->nal_unit_type; h->nal_ref_idc = hx->nal_ref_idc; hx = h; + sl = &h->slice_ctx[0]; goto again; } } @@ -1769,6 +1793,9 @@ av_cold void ff_h264_free_context(H264Context *h) ff_h264_free_tables(h, 1); // FIXME cleanup init stuff perhaps + av_freep(&h->slice_ctx); + h->nb_slice_ctx = 0; + for (i = 0; i < MAX_SPS_COUNT; i++) av_freep(h->sps_buffers + i); diff --git a/libavcodec/h264.h b/libavcodec/h264.h index 37e4cf41ff..a8e1101aa5 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -296,6 +296,22 @@ typedef struct H264Picture { int recovered; ///< picture at IDR or recovery point + recovery count } H264Picture; +typedef struct H264SliceContext { + struct H264Context *h264; + + // Weighted pred stuff + int use_weight; + int use_weight_chroma; + int luma_log2_weight_denom; + int chroma_log2_weight_denom; + int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag + int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag + // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss + int luma_weight[48][2][2]; + int chroma_weight[48][2][2][2]; + int implicit_weight[48][48][2]; +} H264SliceContext; + /** * H264Context */ @@ -312,6 +328,9 @@ typedef struct H264Context { H264Picture *cur_pic_ptr; H264Picture cur_pic; + H264SliceContext *slice_ctx; + int nb_slice_ctx; + int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264 int chroma_qp[2]; // QPc @@ -417,15 +436,6 @@ typedef struct H264Context { DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4]; - // Weighted pred stuff - int use_weight; - int use_weight_chroma; - int luma_log2_weight_denom; - int chroma_log2_weight_denom; - // The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss - int luma_weight[48][2][2]; - int chroma_weight[48][2][2][2]; - int implicit_weight[48][48][2]; int direct_spatial_mv_pred; int col_parity; @@ -683,8 +693,6 @@ typedef struct H264Context { int frame_recovered; ///< Initial frame has been completely recovered - int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag - int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag // Timestamp stuff int sei_buffering_period_present; ///< Buffering period SEI flag @@ -762,7 +770,7 @@ int ff_h264_alloc_tables(H264Context *h); int ff_h264_fill_default_ref_list(H264Context *h); int ff_h264_decode_ref_pic_list_reordering(H264Context *h); -void ff_h264_fill_mbaff_ref_list(H264Context *h); +void ff_h264_fill_mbaff_ref_list(H264Context *h, H264SliceContext *sl); void ff_h264_remove_all_refs(H264Context *h); /** @@ -787,7 +795,7 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h); */ int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma); -void ff_h264_hl_decode_mb(H264Context *h); +void ff_h264_hl_decode_mb(H264Context *h, H264SliceContext *sl); int ff_h264_decode_extradata(H264Context *h); int ff_h264_decode_init(AVCodecContext *avctx); void ff_h264_decode_init_vlc(void); @@ -1036,10 +1044,10 @@ int ff_h264_set_parameter_from_sps(H264Context *h); void ff_h264_draw_horiz_band(H264Context *h, int y, int height); int ff_init_poc(H264Context *h, int pic_field_poc[2], int *pic_poc); -int ff_pred_weight_table(H264Context *h); +int ff_pred_weight_table(H264Context *h, H264SliceContext *sl); int ff_set_ref_count(H264Context *h); -int ff_h264_decode_slice_header(H264Context *h, H264Context *h0); +int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl, H264Context *h0); int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count); int ff_h264_update_thread_context(AVCodecContext *dst, const AVCodecContext *src); diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c index 61d68ab4ec..b2899b62a7 100644 --- a/libavcodec/h264_mb.c +++ b/libavcodec/h264_mb.c @@ -362,7 +362,8 @@ static av_always_inline void mc_part_std(H264Context *h, int n, int square, } } -static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, +static av_always_inline void mc_part_weighted(H264Context *h, H264SliceContext *sl, + int n, int square, int height, int delta, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, @@ -415,8 +416,8 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma_idc); - if (h->use_weight == 2) { - int weight0 = h->implicit_weight[refn0][refn1][h->mb_y & 1]; + if (sl->use_weight == 2) { + int weight0 = sl->implicit_weight[refn0][refn1][h->mb_y & 1]; int weight1 = 64 - weight0; luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, 5, weight0, weight1, 0); @@ -426,23 +427,23 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, chroma_height, 5, weight0, weight1, 0); } else { luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, - h->luma_log2_weight_denom, - h->luma_weight[refn0][0][0], - h->luma_weight[refn1][1][0], - h->luma_weight[refn0][0][1] + - h->luma_weight[refn1][1][1]); + sl->luma_log2_weight_denom, + sl->luma_weight[refn0][0][0], + sl->luma_weight[refn1][1][0], + sl->luma_weight[refn0][0][1] + + sl->luma_weight[refn1][1][1]); chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, - h->chroma_log2_weight_denom, - h->chroma_weight[refn0][0][0][0], - h->chroma_weight[refn1][1][0][0], - h->chroma_weight[refn0][0][0][1] + - h->chroma_weight[refn1][1][0][1]); + sl->chroma_log2_weight_denom, + sl->chroma_weight[refn0][0][0][0], + sl->chroma_weight[refn1][1][0][0], + sl->chroma_weight[refn0][0][0][1] + + sl->chroma_weight[refn1][1][0][1]); chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, - h->chroma_log2_weight_denom, - h->chroma_weight[refn0][0][1][0], - h->chroma_weight[refn1][1][1][0], - h->chroma_weight[refn0][0][1][1] + - h->chroma_weight[refn1][1][1][1]); + sl->chroma_log2_weight_denom, + sl->chroma_weight[refn0][0][1][0], + sl->chroma_weight[refn1][1][1][0], + sl->chroma_weight[refn0][0][1][1] + + sl->chroma_weight[refn1][1][1][1]); } } else { int list = list1 ? 1 : 0; @@ -453,18 +454,18 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square, qpix_put, chroma_put, pixel_shift, chroma_idc); luma_weight_op(dest_y, h->mb_linesize, height, - h->luma_log2_weight_denom, - h->luma_weight[refn][list][0], - h->luma_weight[refn][list][1]); - if (h->use_weight_chroma) { + sl->luma_log2_weight_denom, + sl->luma_weight[refn][list][0], + sl->luma_weight[refn][list][1]); + if (sl->use_weight_chroma) { chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, - h->chroma_log2_weight_denom, - h->chroma_weight[refn][list][0][0], - h->chroma_weight[refn][list][0][1]); + sl->chroma_log2_weight_denom, + sl->chroma_weight[refn][list][0][0], + sl->chroma_weight[refn][list][0][1]); chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, - h->chroma_log2_weight_denom, - h->chroma_weight[refn][list][1][0], - h->chroma_weight[refn][list][1][1]); + sl->chroma_log2_weight_denom, + sl->chroma_weight[refn][list][1][0], + sl->chroma_weight[refn][list][1][1]); } } } @@ -801,7 +802,7 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, #define SIMPLE 0 #include "h264_mb_template.c" -void ff_h264_hl_decode_mb(H264Context *h) +void ff_h264_hl_decode_mb(H264Context *h, H264SliceContext *sl) { const int mb_xy = h->mb_xy; const int mb_type = h->cur_pic.mb_type[mb_xy]; @@ -810,13 +811,13 @@ void ff_h264_hl_decode_mb(H264Context *h) if (CHROMA444(h)) { if (is_complex || h->pixel_shift) - hl_decode_mb_444_complex(h); + hl_decode_mb_444_complex(h, sl); else - hl_decode_mb_444_simple_8(h); + hl_decode_mb_444_simple_8(h, sl); } else if (is_complex) { - hl_decode_mb_complex(h); + hl_decode_mb_complex(h, sl); } else if (h->pixel_shift) { - hl_decode_mb_simple_16(h); + hl_decode_mb_simple_16(h, sl); } else - hl_decode_mb_simple_8(h); + hl_decode_mb_simple_8(h, sl); } diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c index 5b2917f51c..262ad0c2f6 100644 --- a/libavcodec/h264_mb_template.c +++ b/libavcodec/h264_mb_template.c @@ -40,7 +40,7 @@ #define CHROMA_IDC 2 #include "h264_mc_template.c" -static av_noinline void FUNC(hl_decode_mb)(H264Context *h) +static av_noinline void FUNC(hl_decode_mb)(H264Context *h, H264SliceContext *sl) { const int mb_x = h->mb_x; const int mb_y = h->mb_y; @@ -176,13 +176,13 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) uvlinesize, 0, 0, SIMPLE, PIXEL_SHIFT); } else if (is_h264) { if (chroma422) { - FUNC(hl_motion_422)(h, dest_y, dest_cb, dest_cr, + FUNC(hl_motion_422)(h, sl, dest_y, dest_cb, dest_cr, h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab, h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab, h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab); } else { - FUNC(hl_motion_420)(h, dest_y, dest_cb, dest_cr, + FUNC(hl_motion_420)(h, sl, dest_y, dest_cb, dest_cr, h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab, h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab, h->h264dsp.weight_h264_pixels_tab, @@ -272,7 +272,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) #define CHROMA_IDC 3 #include "h264_mc_template.c" -static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h) +static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h, H264SliceContext *sl) { const int mb_x = h->mb_x; const int mb_y = h->mb_y; @@ -355,7 +355,7 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h) xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, SIMPLE, PIXEL_SHIFT); } else { - FUNC(hl_motion_444)(h, dest[0], dest[1], dest[2], + FUNC(hl_motion_444)(h, sl, dest[0], dest[1], dest[2], h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab, h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab, h->h264dsp.weight_h264_pixels_tab, diff --git a/libavcodec/h264_mc_template.c b/libavcodec/h264_mc_template.c index c085cc35d7..0e58eb3a87 100644 --- a/libavcodec/h264_mc_template.c +++ b/libavcodec/h264_mc_template.c @@ -34,7 +34,8 @@ #undef mc_part #define mc_part MCFUNC(mc_part) -static void mc_part(H264Context *h, int n, int square, +static void mc_part(H264Context *h, H264SliceContext *sl, + int n, int square, int height, int delta, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, @@ -47,10 +48,10 @@ static void mc_part(H264Context *h, int n, int square, h264_biweight_func *weight_avg, int list0, int list1) { - if ((h->use_weight == 2 && list0 && list1 && - (h->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->mb_y & 1] != 32)) || - h->use_weight == 1) - mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr, + if ((sl->use_weight == 2 && list0 && list1 && + (sl->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->mb_y & 1] != 32)) || + sl->use_weight == 1) + mc_part_weighted(h, sl, n, square, height, delta, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put, chroma_put, weight_op[0], weight_op[1], weight_avg[0], weight_avg[1], list0, list1, PIXEL_SHIFT, CHROMA_IDC); @@ -60,7 +61,8 @@ static void mc_part(H264Context *h, int n, int square, chroma_avg, list0, list1, PIXEL_SHIFT, CHROMA_IDC); } -static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y, +static void MCFUNC(hl_motion)(H264Context *h, H264SliceContext *sl, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, qpel_mc_func(*qpix_put)[16], h264_chroma_mc_func(*chroma_put), @@ -79,25 +81,25 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y, prefetch_motion(h, 0, PIXEL_SHIFT, CHROMA_IDC); if (IS_16X16(mb_type)) { - mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0, + mc_part(h, sl, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); } else if (IS_16X8(mb_type)) { - mc_part(h, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0, + mc_part(h, sl, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); - mc_part(h, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4, + mc_part(h, sl, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); } else if (IS_8X16(mb_type)) { - mc_part(h, 0, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, + mc_part(h, sl, 0, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); - mc_part(h, 4, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, + mc_part(h, sl, 4, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); @@ -113,29 +115,29 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y, int y_offset = (i & 2) << 1; if (IS_SUB_8X8(sub_mb_type)) { - mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr, + mc_part(h, sl, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else if (IS_SUB_8X4(sub_mb_type)) { - mc_part(h, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, + mc_part(h, sl, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - mc_part(h, n + 2, 0, 4, 4 << PIXEL_SHIFT, + mc_part(h, sl, n + 2, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, x_offset, y_offset + 2, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else if (IS_SUB_4X8(sub_mb_type)) { - mc_part(h, n, 0, 8, 4 * h->mb_linesize, + mc_part(h, sl, n, 0, 8, 4 * h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); - mc_part(h, n + 1, 0, 8, 4 * h->mb_linesize, + mc_part(h, sl, n + 1, 0, 8, 4 * h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset + 2, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], @@ -146,7 +148,7 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y, for (j = 0; j < 4; j++) { int sub_x_offset = x_offset + 2 * (j & 1); int sub_y_offset = y_offset + (j & 2); - mc_part(h, n + j, 1, 4, 0, + mc_part(h, sl, n + j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c index 21e320d752..672411e7b8 100644 --- a/libavcodec/h264_parser.c +++ b/libavcodec/h264_parser.c @@ -102,6 +102,7 @@ static int scan_mmco_reset(AVCodecParserContext *s) { H264ParseContext *p = s->priv_data; H264Context *h = &p->h; + H264SliceContext *sl = &h->slice_ctx[0]; h->slice_type_nos = s->pict_type & 3; @@ -141,7 +142,7 @@ static int scan_mmco_reset(AVCodecParserContext *s) if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) || (h->pps.weighted_bipred_idc == 1 && h->slice_type_nos == AV_PICTURE_TYPE_B)) - ff_pred_weight_table(h); + ff_pred_weight_table(h, sl); if (get_bits1(&h->gb)) { // adaptive_ref_pic_marking_mode_flag int i; @@ -543,6 +544,12 @@ static av_cold int init(AVCodecParserContext *s) { H264ParseContext *p = s->priv_data; H264Context *h = &p->h; + + h->slice_ctx = av_mallocz(sizeof(*h->slice_ctx)); + if (!h->slice_ctx) + return 0; + h->nb_slice_ctx = 1; + h->thread_context[0] = h; h->slice_context_count = 1; ff_h264dsp_init(&h->h264dsp, 8, 1); diff --git a/libavcodec/h264_refs.c b/libavcodec/h264_refs.c index 03c1b9c6c3..3357d2cc94 100644 --- a/libavcodec/h264_refs.c +++ b/libavcodec/h264_refs.c @@ -337,7 +337,7 @@ int ff_h264_decode_ref_pic_list_reordering(H264Context *h) return 0; } -void ff_h264_fill_mbaff_ref_list(H264Context *h) +void ff_h264_fill_mbaff_ref_list(H264Context *h, H264SliceContext *sl) { int list, i, j; for (list = 0; list < 2; list++) { //FIXME try list_count @@ -355,11 +355,11 @@ void ff_h264_fill_mbaff_ref_list(H264Context *h) field[1].reference = PICT_BOTTOM_FIELD; field[1].poc = field[1].field_poc[1]; - h->luma_weight[16 + 2 * i][list][0] = h->luma_weight[16 + 2 * i + 1][list][0] = h->luma_weight[i][list][0]; - h->luma_weight[16 + 2 * i][list][1] = h->luma_weight[16 + 2 * i + 1][list][1] = h->luma_weight[i][list][1]; + sl->luma_weight[16 + 2 * i][list][0] = sl->luma_weight[16 + 2 * i + 1][list][0] = sl->luma_weight[i][list][0]; + sl->luma_weight[16 + 2 * i][list][1] = sl->luma_weight[16 + 2 * i + 1][list][1] = sl->luma_weight[i][list][1]; for (j = 0; j < 2; j++) { - h->chroma_weight[16 + 2 * i][list][j][0] = h->chroma_weight[16 + 2 * i + 1][list][j][0] = h->chroma_weight[i][list][j][0]; - h->chroma_weight[16 + 2 * i][list][j][1] = h->chroma_weight[16 + 2 * i + 1][list][j][1] = h->chroma_weight[i][list][j][1]; + sl->chroma_weight[16 + 2 * i][list][j][0] = sl->chroma_weight[16 + 2 * i + 1][list][j][0] = sl->chroma_weight[i][list][j][0]; + sl->chroma_weight[16 + 2 * i][list][j][1] = sl->chroma_weight[16 + 2 * i + 1][list][j][1] = sl->chroma_weight[i][list][j][1]; } } } diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index 6f22c87f6d..6bdd261de7 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -484,6 +484,8 @@ int ff_h264_update_thread_context(AVCodecContext *dst, } if (!inited) { + H264SliceContext *orig_slice_ctx = h->slice_ctx; + for (i = 0; i < MAX_SPS_COUNT; i++) av_freep(h->sps_buffers + i); @@ -503,6 +505,8 @@ int ff_h264_update_thread_context(AVCodecContext *dst, av_frame_unref(&h->cur_pic.f); h->cur_pic.tf.f = &h->cur_pic.f; + h->slice_ctx = orig_slice_ctx; + h->avctx = dst; h->DPB = NULL; h->qscale_table_pool = NULL; @@ -814,13 +818,13 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, * @param field 0/1 initialize the weight for interlaced MBAFF * -1 initializes the rest */ -static void implicit_weight_table(H264Context *h, int field) +static void implicit_weight_table(H264Context *h, H264SliceContext *sl, int field) { int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1; for (i = 0; i < 2; i++) { - h->luma_weight_flag[i] = 0; - h->chroma_weight_flag[i] = 0; + sl->luma_weight_flag[i] = 0; + sl->chroma_weight_flag[i] = 0; } if (field < 0) { @@ -831,8 +835,8 @@ static void implicit_weight_table(H264Context *h, int field) } if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF(h) && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) { - h->use_weight = 0; - h->use_weight_chroma = 0; + sl->use_weight = 0; + sl->use_weight_chroma = 0; return; } ref_start = 0; @@ -845,10 +849,10 @@ static void implicit_weight_table(H264Context *h, int field) ref_count1 = 16 + 2 * h->ref_count[1]; } - h->use_weight = 2; - h->use_weight_chroma = 2; - h->luma_log2_weight_denom = 5; - h->chroma_log2_weight_denom = 5; + sl->use_weight = 2; + sl->use_weight_chroma = 2; + sl->luma_log2_weight_denom = 5; + sl->chroma_log2_weight_denom = 5; for (ref0 = ref_start; ref0 < ref_count0; ref0++) { int poc0 = h->ref_list[0][ref0].poc; @@ -866,10 +870,10 @@ static void implicit_weight_table(H264Context *h, int field) } } if (field < 0) { - h->implicit_weight[ref0][ref1][0] = - h->implicit_weight[ref0][ref1][1] = w; + sl->implicit_weight[ref0][ref1][0] = + sl->implicit_weight[ref0][ref1][1] = w; } else { - h->implicit_weight[ref0][ref1][field] = w; + sl->implicit_weight[ref0][ref1][field] = w; } } } @@ -1139,6 +1143,8 @@ static int h264_slice_header_init(H264Context *h, int reinit) c->workaround_bugs = h->workaround_bugs; c->pict_type = h->pict_type; + h->slice_ctx[i].h264 = c; + init_scan_tables(c); clone_tables(c, h, i); c->context_initialized = 1; @@ -1166,7 +1172,7 @@ static int h264_slice_header_init(H264Context *h, int reinit) * * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded */ -int ff_h264_decode_slice_header(H264Context *h, H264Context *h0) +int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl, H264Context *h0) { unsigned int first_mb_in_slice; unsigned int pps_id; @@ -1606,15 +1612,15 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0) if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) || (h->pps.weighted_bipred_idc == 1 && h->slice_type_nos == AV_PICTURE_TYPE_B)) - ff_pred_weight_table(h); + ff_pred_weight_table(h, sl); else if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) { - implicit_weight_table(h, -1); + implicit_weight_table(h, sl, -1); } else { - h->use_weight = 0; + sl->use_weight = 0; for (i = 0; i < 2; i++) { - h->luma_weight_flag[i] = 0; - h->chroma_weight_flag[i] = 0; + sl->luma_weight_flag[i] = 0; + sl->chroma_weight_flag[i] = 0; } } @@ -1632,11 +1638,11 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0) } if (FRAME_MBAFF(h)) { - ff_h264_fill_mbaff_ref_list(h); + ff_h264_fill_mbaff_ref_list(h, sl); if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) { - implicit_weight_table(h, 0); - implicit_weight_table(h, 1); + implicit_weight_table(h, sl, 0); + implicit_weight_table(h, sl, 1); } } @@ -1789,8 +1795,8 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0) h->qscale, h->deblocking_filter, h->slice_alpha_c0_offset, h->slice_beta_offset, - h->use_weight, - h->use_weight == 1 && h->use_weight_chroma ? "c" : "", + sl->use_weight, + sl->use_weight == 1 && sl->use_weight_chroma ? "c" : "", h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""); } @@ -2170,7 +2176,8 @@ static void er_add_slice(H264Context *h, int startx, int starty, static int decode_slice(struct AVCodecContext *avctx, void *arg) { - H264Context *h = *(void **)arg; + H264SliceContext *sl = arg; + H264Context *h = sl->h264; int lf_x_start = h->mb_x; h->mb_skip_run = -1; @@ -2197,7 +2204,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) // STOP_TIMER("decode_mb_cabac") if (ret >= 0) - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, sl); // FIXME optimal? or let mb_decode decode 16x32 ? if (ret >= 0 && FRAME_MBAFF(h)) { @@ -2206,7 +2213,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) ret = ff_h264_decode_mb_cabac(h); if (ret >= 0) - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, sl); h->mb_y--; } eos = get_cabac_terminate(&h->cabac); @@ -2256,7 +2263,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) int ret = ff_h264_decode_mb_cavlc(h); if (ret >= 0) - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, sl); // FIXME optimal? or let mb_decode decode 16x32 ? if (ret >= 0 && FRAME_MBAFF(h)) { @@ -2264,7 +2271,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) ret = ff_h264_decode_mb_cavlc(h); if (ret >= 0) - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, sl); h->mb_y--; } @@ -2341,15 +2348,15 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count) if (h->avctx->hwaccel) return 0; if (context_count == 1) { - return decode_slice(avctx, &h); + return decode_slice(avctx, &h->slice_ctx[0]); } else { for (i = 1; i < context_count; i++) { hx = h->thread_context[i]; hx->er.error_count = 0; } - avctx->execute(avctx, decode_slice, h->thread_context, - NULL, context_count, sizeof(void *)); + avctx->execute(avctx, decode_slice, h->slice_ctx, + NULL, context_count, sizeof(h->slice_ctx[0])); /* pull back stuff from slices to master context */ hx = h->thread_context[context_count - 1]; diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c index 88c060d307..aa2911d04d 100644 --- a/libavcodec/svq3.c +++ b/libavcodec/svq3.c @@ -1268,7 +1268,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data, } if (mb_type != 0) - ff_h264_hl_decode_mb(h); + ff_h264_hl_decode_mb(h, &h->slice_ctx[0]); if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay) h->cur_pic.mb_type[h->mb_x + h->mb_y * h->mb_stride] = diff --git a/libavcodec/vaapi_h264.c b/libavcodec/vaapi_h264.c index 651a50b85c..d910dba822 100644 --- a/libavcodec/vaapi_h264.c +++ b/libavcodec/vaapi_h264.c @@ -192,27 +192,28 @@ static void fill_vaapi_plain_pred_weight_table(H264Context *h, short chroma_weight[32][2], short chroma_offset[32][2]) { + H264SliceContext *sl = &h->slice_ctx[0]; unsigned int i, j; - *luma_weight_flag = h->luma_weight_flag[list]; - *chroma_weight_flag = h->chroma_weight_flag[list]; + *luma_weight_flag = sl->luma_weight_flag[list]; + *chroma_weight_flag = sl->chroma_weight_flag[list]; for (i = 0; i < h->ref_count[list]; i++) { /* VA API also wants the inferred (default) values, not only what is available in the bitstream (7.4.3.2). */ - if (h->luma_weight_flag[list]) { - luma_weight[i] = h->luma_weight[i][list][0]; - luma_offset[i] = h->luma_weight[i][list][1]; + if (sl->luma_weight_flag[list]) { + luma_weight[i] = sl->luma_weight[i][list][0]; + luma_offset[i] = sl->luma_weight[i][list][1]; } else { - luma_weight[i] = 1 << h->luma_log2_weight_denom; + luma_weight[i] = 1 << sl->luma_log2_weight_denom; luma_offset[i] = 0; } for (j = 0; j < 2; j++) { - if (h->chroma_weight_flag[list]) { - chroma_weight[i][j] = h->chroma_weight[i][list][j][0]; - chroma_offset[i][j] = h->chroma_weight[i][list][j][1]; + if (sl->chroma_weight_flag[list]) { + chroma_weight[i][j] = sl->chroma_weight[i][list][j][0]; + chroma_offset[i][j] = sl->chroma_weight[i][list][j][1]; } else { - chroma_weight[i][j] = 1 << h->chroma_log2_weight_denom; + chroma_weight[i][j] = 1 << sl->chroma_log2_weight_denom; chroma_offset[i][j] = 0; } } @@ -316,6 +317,7 @@ static int vaapi_h264_decode_slice(AVCodecContext *avctx, uint32_t size) { H264Context * const h = avctx->priv_data; + H264SliceContext *sl = &h->slice_ctx[0]; VASliceParameterBufferH264 *slice_param; av_dlog(avctx, "vaapi_h264_decode_slice(): buffer %p, size %d\n", @@ -336,8 +338,8 @@ static int vaapi_h264_decode_slice(AVCodecContext *avctx, slice_param->disable_deblocking_filter_idc = h->deblocking_filter < 2 ? !h->deblocking_filter : h->deblocking_filter; slice_param->slice_alpha_c0_offset_div2 = h->slice_alpha_c0_offset / 2; slice_param->slice_beta_offset_div2 = h->slice_beta_offset / 2; - slice_param->luma_log2_weight_denom = h->luma_log2_weight_denom; - slice_param->chroma_log2_weight_denom = h->chroma_log2_weight_denom; + slice_param->luma_log2_weight_denom = sl->luma_log2_weight_denom; + slice_param->chroma_log2_weight_denom = sl->chroma_log2_weight_denom; fill_vaapi_RefPicList(slice_param->RefPicList0, h->ref_list[0], h->list_count > 0 ? h->ref_count[0] : 0); fill_vaapi_RefPicList(slice_param->RefPicList1, h->ref_list[1], h->list_count > 1 ? h->ref_count[1] : 0);