h264: split weighted pred-related vars into per-slice context

pull/125/head
Anton Khirnov 10 years ago
parent d8a45d2d49
commit 92c6c2a605
  1. 21
      libavcodec/dxva2_h264.c
  2. 83
      libavcodec/h264.c
  3. 38
      libavcodec/h264.h
  4. 69
      libavcodec/h264_mb.c
  5. 10
      libavcodec/h264_mb_template.c
  6. 36
      libavcodec/h264_mc_template.c
  7. 9
      libavcodec/h264_parser.c
  8. 10
      libavcodec/h264_refs.c
  9. 69
      libavcodec/h264_slice.c
  10. 2
      libavcodec/svq3.c
  11. 26
      libavcodec/vaapi_h264.c

@ -211,6 +211,7 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
const DXVA_PicParams_H264 *pp, unsigned position, unsigned size)
{
const H264Context *h = avctx->priv_data;
H264SliceContext *sl = &h->slice_ctx[0];
struct dxva_context *ctx = avctx->hwaccel_context;
unsigned list;
@ -225,8 +226,8 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
slice->slice_type = ff_h264_get_slice_type(h);
if (h->slice_type_fixed)
slice->slice_type += 5;
slice->luma_log2_weight_denom = h->luma_log2_weight_denom;
slice->chroma_log2_weight_denom = h->chroma_log2_weight_denom;
slice->luma_log2_weight_denom = sl->luma_log2_weight_denom;
slice->chroma_log2_weight_denom = sl->chroma_log2_weight_denom;
if (h->list_count > 0)
slice->num_ref_idx_l0_active_minus1 = h->ref_count[0] - 1;
if (h->list_count > 1)
@ -250,15 +251,15 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
r->reference == PICT_BOTTOM_FIELD);
for (plane = 0; plane < 3; plane++) {
int w, o;
if (plane == 0 && h->luma_weight_flag[list]) {
w = h->luma_weight[i][list][0];
o = h->luma_weight[i][list][1];
} else if (plane >= 1 && h->chroma_weight_flag[list]) {
w = h->chroma_weight[i][list][plane-1][0];
o = h->chroma_weight[i][list][plane-1][1];
if (plane == 0 && sl->luma_weight_flag[list]) {
w = sl->luma_weight[i][list][0];
o = sl->luma_weight[i][list][1];
} else if (plane >= 1 && sl->chroma_weight_flag[list]) {
w = sl->chroma_weight[i][list][plane-1][0];
o = sl->chroma_weight[i][list][plane-1][1];
} else {
w = 1 << (plane == 0 ? h->luma_log2_weight_denom :
h->chroma_log2_weight_denom);
w = 1 << (plane == 0 ? sl->luma_log2_weight_denom :
sl->chroma_log2_weight_denom);
o = 0;
}
slice->Weights[list][i][plane][0] = w;

@ -74,7 +74,7 @@ static void h264_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
fill_rectangle(h->mv_cache[0][scan8[0]], 4, 4, 8,
pack16to32((*mv)[0][0][0], (*mv)[0][0][1]), 4);
assert(!FRAME_MBAFF(h));
ff_h264_hl_decode_mb(h);
ff_h264_hl_decode_mb(h, &h->slice_ctx[0]);
}
void ff_h264_draw_horiz_band(H264Context *h, int y, int height)
@ -642,7 +642,17 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
h->pixel_shift = 0;
h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
h->nb_slice_ctx = (avctx->active_thread_type & FF_THREAD_SLICE) ? H264_MAX_THREADS : 1;
h->slice_ctx = av_mallocz_array(h->nb_slice_ctx, sizeof(*h->slice_ctx));
if (!h->slice_ctx) {
h->nb_slice_ctx = 0;
return AVERROR(ENOMEM);
}
h->thread_context[0] = h;
for (i = 0; i < h->nb_slice_ctx; i++)
h->slice_ctx[i].h264 = h->thread_context[0];
h->outputed_poc = h->next_outputed_poc = INT_MIN;
for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
h->last_pocs[i] = INT_MIN;
@ -679,12 +689,23 @@ av_cold int ff_h264_decode_init(AVCodecContext *avctx)
static int decode_init_thread_copy(AVCodecContext *avctx)
{
H264Context *h = avctx->priv_data;
int i;
if (!avctx->internal->is_copy)
return 0;
memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
h->nb_slice_ctx = (avctx->active_thread_type & FF_THREAD_SLICE) ? H264_MAX_THREADS : 1;
h->slice_ctx = av_mallocz_array(h->nb_slice_ctx, sizeof(*h->slice_ctx));
if (!h->slice_ctx) {
h->nb_slice_ctx = 0;
return AVERROR(ENOMEM);
}
for (i = 0; i < h->nb_slice_ctx; i++)
h->slice_ctx[i].h264 = h;
h->avctx = avctx;
h->rbsp_buffer[0] = NULL;
h->rbsp_buffer[1] = NULL;
@ -976,37 +997,37 @@ static void decode_postinit(H264Context *h, int setup_finished)
ff_thread_finish_setup(h->avctx);
}
int ff_pred_weight_table(H264Context *h)
int ff_pred_weight_table(H264Context *h, H264SliceContext *sl)
{
int list, i;
int luma_def, chroma_def;
h->use_weight = 0;
h->use_weight_chroma = 0;
h->luma_log2_weight_denom = get_ue_golomb(&h->gb);
sl->use_weight = 0;
sl->use_weight_chroma = 0;
sl->luma_log2_weight_denom = get_ue_golomb(&h->gb);
if (h->sps.chroma_format_idc)
h->chroma_log2_weight_denom = get_ue_golomb(&h->gb);
luma_def = 1 << h->luma_log2_weight_denom;
chroma_def = 1 << h->chroma_log2_weight_denom;
sl->chroma_log2_weight_denom = get_ue_golomb(&h->gb);
luma_def = 1 << sl->luma_log2_weight_denom;
chroma_def = 1 << sl->chroma_log2_weight_denom;
for (list = 0; list < 2; list++) {
h->luma_weight_flag[list] = 0;
h->chroma_weight_flag[list] = 0;
sl->luma_weight_flag[list] = 0;
sl->chroma_weight_flag[list] = 0;
for (i = 0; i < h->ref_count[list]; i++) {
int luma_weight_flag, chroma_weight_flag;
luma_weight_flag = get_bits1(&h->gb);
if (luma_weight_flag) {
h->luma_weight[i][list][0] = get_se_golomb(&h->gb);
h->luma_weight[i][list][1] = get_se_golomb(&h->gb);
if (h->luma_weight[i][list][0] != luma_def ||
h->luma_weight[i][list][1] != 0) {
h->use_weight = 1;
h->luma_weight_flag[list] = 1;
sl->luma_weight[i][list][0] = get_se_golomb(&h->gb);
sl->luma_weight[i][list][1] = get_se_golomb(&h->gb);
if (sl->luma_weight[i][list][0] != luma_def ||
sl->luma_weight[i][list][1] != 0) {
sl->use_weight = 1;
sl->luma_weight_flag[list] = 1;
}
} else {
h->luma_weight[i][list][0] = luma_def;
h->luma_weight[i][list][1] = 0;
sl->luma_weight[i][list][0] = luma_def;
sl->luma_weight[i][list][1] = 0;
}
if (h->sps.chroma_format_idc) {
@ -1014,19 +1035,19 @@ int ff_pred_weight_table(H264Context *h)
if (chroma_weight_flag) {
int j;
for (j = 0; j < 2; j++) {
h->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb);
h->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb);
if (h->chroma_weight[i][list][j][0] != chroma_def ||
h->chroma_weight[i][list][j][1] != 0) {
h->use_weight_chroma = 1;
h->chroma_weight_flag[list] = 1;
sl->chroma_weight[i][list][j][0] = get_se_golomb(&h->gb);
sl->chroma_weight[i][list][j][1] = get_se_golomb(&h->gb);
if (sl->chroma_weight[i][list][j][0] != chroma_def ||
sl->chroma_weight[i][list][j][1] != 0) {
sl->use_weight_chroma = 1;
sl->chroma_weight_flag[list] = 1;
}
}
} else {
int j;
for (j = 0; j < 2; j++) {
h->chroma_weight[i][list][j][0] = chroma_def;
h->chroma_weight[i][list][j][1] = 0;
sl->chroma_weight[i][list][j][0] = chroma_def;
sl->chroma_weight[i][list][j][1] = 0;
}
}
}
@ -1034,7 +1055,7 @@ int ff_pred_weight_table(H264Context *h)
if (h->slice_type_nos != AV_PICTURE_TYPE_B)
break;
}
h->use_weight = h->use_weight || h->use_weight_chroma;
sl->use_weight = sl->use_weight || sl->use_weight_chroma;
return 0;
}
@ -1401,6 +1422,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
{
AVCodecContext *const avctx = h->avctx;
H264Context *hx; ///< thread context
H264SliceContext *sl;
int buf_index;
unsigned context_count;
int next_avc;
@ -1446,6 +1468,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
}
hx = h->thread_context[context_count];
sl = &h->slice_ctx[context_count];
ptr = ff_h264_decode_nal(hx, buf + buf_index, &dst_length,
&consumed, next_avc - buf_index);
@ -1505,7 +1528,7 @@ again:
hx->intra_gb_ptr =
hx->inter_gb_ptr = &hx->gb;
if ((err = ff_h264_decode_slice_header(hx, h)))
if ((err = ff_h264_decode_slice_header(hx, sl, h)))
break;
if (h->sei_recovery_frame_cnt >= 0 && h->recovery_frame < 0) {
@ -1622,6 +1645,7 @@ again:
h->nal_unit_type = hx->nal_unit_type;
h->nal_ref_idc = hx->nal_ref_idc;
hx = h;
sl = &h->slice_ctx[0];
goto again;
}
}
@ -1769,6 +1793,9 @@ av_cold void ff_h264_free_context(H264Context *h)
ff_h264_free_tables(h, 1); // FIXME cleanup init stuff perhaps
av_freep(&h->slice_ctx);
h->nb_slice_ctx = 0;
for (i = 0; i < MAX_SPS_COUNT; i++)
av_freep(h->sps_buffers + i);

@ -296,6 +296,22 @@ typedef struct H264Picture {
int recovered; ///< picture at IDR or recovery point + recovery count
} H264Picture;
typedef struct H264SliceContext {
struct H264Context *h264;
// Weighted pred stuff
int use_weight;
int use_weight_chroma;
int luma_log2_weight_denom;
int chroma_log2_weight_denom;
int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag
int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag
// The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
int luma_weight[48][2][2];
int chroma_weight[48][2][2][2];
int implicit_weight[48][48][2];
} H264SliceContext;
/**
* H264Context
*/
@ -312,6 +328,9 @@ typedef struct H264Context {
H264Picture *cur_pic_ptr;
H264Picture cur_pic;
H264SliceContext *slice_ctx;
int nb_slice_ctx;
int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264
int chroma_qp[2]; // QPc
@ -417,15 +436,6 @@ typedef struct H264Context {
DECLARE_ALIGNED(8, uint16_t, sub_mb_type)[4];
// Weighted pred stuff
int use_weight;
int use_weight_chroma;
int luma_log2_weight_denom;
int chroma_log2_weight_denom;
// The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
int luma_weight[48][2][2];
int chroma_weight[48][2][2][2];
int implicit_weight[48][48][2];
int direct_spatial_mv_pred;
int col_parity;
@ -683,8 +693,6 @@ typedef struct H264Context {
int frame_recovered; ///< Initial frame has been completely recovered
int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag
int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag
// Timestamp stuff
int sei_buffering_period_present; ///< Buffering period SEI flag
@ -762,7 +770,7 @@ int ff_h264_alloc_tables(H264Context *h);
int ff_h264_fill_default_ref_list(H264Context *h);
int ff_h264_decode_ref_pic_list_reordering(H264Context *h);
void ff_h264_fill_mbaff_ref_list(H264Context *h);
void ff_h264_fill_mbaff_ref_list(H264Context *h, H264SliceContext *sl);
void ff_h264_remove_all_refs(H264Context *h);
/**
@ -787,7 +795,7 @@ int ff_h264_check_intra4x4_pred_mode(H264Context *h);
*/
int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma);
void ff_h264_hl_decode_mb(H264Context *h);
void ff_h264_hl_decode_mb(H264Context *h, H264SliceContext *sl);
int ff_h264_decode_extradata(H264Context *h);
int ff_h264_decode_init(AVCodecContext *avctx);
void ff_h264_decode_init_vlc(void);
@ -1036,10 +1044,10 @@ int ff_h264_set_parameter_from_sps(H264Context *h);
void ff_h264_draw_horiz_band(H264Context *h, int y, int height);
int ff_init_poc(H264Context *h, int pic_field_poc[2], int *pic_poc);
int ff_pred_weight_table(H264Context *h);
int ff_pred_weight_table(H264Context *h, H264SliceContext *sl);
int ff_set_ref_count(H264Context *h);
int ff_h264_decode_slice_header(H264Context *h, H264Context *h0);
int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl, H264Context *h0);
int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count);
int ff_h264_update_thread_context(AVCodecContext *dst,
const AVCodecContext *src);

@ -362,7 +362,8 @@ static av_always_inline void mc_part_std(H264Context *h, int n, int square,
}
}
static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
static av_always_inline void mc_part_weighted(H264Context *h, H264SliceContext *sl,
int n, int square,
int height, int delta,
uint8_t *dest_y, uint8_t *dest_cb,
uint8_t *dest_cr,
@ -415,8 +416,8 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
x_offset, y_offset, qpix_put, chroma_put,
pixel_shift, chroma_idc);
if (h->use_weight == 2) {
int weight0 = h->implicit_weight[refn0][refn1][h->mb_y & 1];
if (sl->use_weight == 2) {
int weight0 = sl->implicit_weight[refn0][refn1][h->mb_y & 1];
int weight1 = 64 - weight0;
luma_weight_avg(dest_y, tmp_y, h->mb_linesize,
height, 5, weight0, weight1, 0);
@ -426,23 +427,23 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
chroma_height, 5, weight0, weight1, 0);
} else {
luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height,
h->luma_log2_weight_denom,
h->luma_weight[refn0][0][0],
h->luma_weight[refn1][1][0],
h->luma_weight[refn0][0][1] +
h->luma_weight[refn1][1][1]);
sl->luma_log2_weight_denom,
sl->luma_weight[refn0][0][0],
sl->luma_weight[refn1][1][0],
sl->luma_weight[refn0][0][1] +
sl->luma_weight[refn1][1][1]);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height,
h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][0][0],
h->chroma_weight[refn1][1][0][0],
h->chroma_weight[refn0][0][0][1] +
h->chroma_weight[refn1][1][0][1]);
sl->chroma_log2_weight_denom,
sl->chroma_weight[refn0][0][0][0],
sl->chroma_weight[refn1][1][0][0],
sl->chroma_weight[refn0][0][0][1] +
sl->chroma_weight[refn1][1][0][1]);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height,
h->chroma_log2_weight_denom,
h->chroma_weight[refn0][0][1][0],
h->chroma_weight[refn1][1][1][0],
h->chroma_weight[refn0][0][1][1] +
h->chroma_weight[refn1][1][1][1]);
sl->chroma_log2_weight_denom,
sl->chroma_weight[refn0][0][1][0],
sl->chroma_weight[refn1][1][1][0],
sl->chroma_weight[refn0][0][1][1] +
sl->chroma_weight[refn1][1][1][1]);
}
} else {
int list = list1 ? 1 : 0;
@ -453,18 +454,18 @@ static av_always_inline void mc_part_weighted(H264Context *h, int n, int square,
qpix_put, chroma_put, pixel_shift, chroma_idc);
luma_weight_op(dest_y, h->mb_linesize, height,
h->luma_log2_weight_denom,
h->luma_weight[refn][list][0],
h->luma_weight[refn][list][1]);
if (h->use_weight_chroma) {
sl->luma_log2_weight_denom,
sl->luma_weight[refn][list][0],
sl->luma_weight[refn][list][1]);
if (sl->use_weight_chroma) {
chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height,
h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][0][0],
h->chroma_weight[refn][list][0][1]);
sl->chroma_log2_weight_denom,
sl->chroma_weight[refn][list][0][0],
sl->chroma_weight[refn][list][0][1]);
chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height,
h->chroma_log2_weight_denom,
h->chroma_weight[refn][list][1][0],
h->chroma_weight[refn][list][1][1]);
sl->chroma_log2_weight_denom,
sl->chroma_weight[refn][list][1][0],
sl->chroma_weight[refn][list][1][1]);
}
}
}
@ -801,7 +802,7 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
#define SIMPLE 0
#include "h264_mb_template.c"
void ff_h264_hl_decode_mb(H264Context *h)
void ff_h264_hl_decode_mb(H264Context *h, H264SliceContext *sl)
{
const int mb_xy = h->mb_xy;
const int mb_type = h->cur_pic.mb_type[mb_xy];
@ -810,13 +811,13 @@ void ff_h264_hl_decode_mb(H264Context *h)
if (CHROMA444(h)) {
if (is_complex || h->pixel_shift)
hl_decode_mb_444_complex(h);
hl_decode_mb_444_complex(h, sl);
else
hl_decode_mb_444_simple_8(h);
hl_decode_mb_444_simple_8(h, sl);
} else if (is_complex) {
hl_decode_mb_complex(h);
hl_decode_mb_complex(h, sl);
} else if (h->pixel_shift) {
hl_decode_mb_simple_16(h);
hl_decode_mb_simple_16(h, sl);
} else
hl_decode_mb_simple_8(h);
hl_decode_mb_simple_8(h, sl);
}

@ -40,7 +40,7 @@
#define CHROMA_IDC 2
#include "h264_mc_template.c"
static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
static av_noinline void FUNC(hl_decode_mb)(H264Context *h, H264SliceContext *sl)
{
const int mb_x = h->mb_x;
const int mb_y = h->mb_y;
@ -176,13 +176,13 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
uvlinesize, 0, 0, SIMPLE, PIXEL_SHIFT);
} else if (is_h264) {
if (chroma422) {
FUNC(hl_motion_422)(h, dest_y, dest_cb, dest_cr,
FUNC(hl_motion_422)(h, sl, dest_y, dest_cb, dest_cr,
h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab);
} else {
FUNC(hl_motion_420)(h, dest_y, dest_cb, dest_cr,
FUNC(hl_motion_420)(h, sl, dest_y, dest_cb, dest_cr,
h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
@ -272,7 +272,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
#define CHROMA_IDC 3
#include "h264_mc_template.c"
static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h, H264SliceContext *sl)
{
const int mb_x = h->mb_x;
const int mb_y = h->mb_y;
@ -355,7 +355,7 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
linesize, 0, 1, SIMPLE, PIXEL_SHIFT);
} else {
FUNC(hl_motion_444)(h, dest[0], dest[1], dest[2],
FUNC(hl_motion_444)(h, sl, dest[0], dest[1], dest[2],
h->qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
h->qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,

@ -34,7 +34,8 @@
#undef mc_part
#define mc_part MCFUNC(mc_part)
static void mc_part(H264Context *h, int n, int square,
static void mc_part(H264Context *h, H264SliceContext *sl,
int n, int square,
int height, int delta,
uint8_t *dest_y, uint8_t *dest_cb,
uint8_t *dest_cr,
@ -47,10 +48,10 @@ static void mc_part(H264Context *h, int n, int square,
h264_biweight_func *weight_avg,
int list0, int list1)
{
if ((h->use_weight == 2 && list0 && list1 &&
(h->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->mb_y & 1] != 32)) ||
h->use_weight == 1)
mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
if ((sl->use_weight == 2 && list0 && list1 &&
(sl->implicit_weight[h->ref_cache[0][scan8[n]]][h->ref_cache[1][scan8[n]]][h->mb_y & 1] != 32)) ||
sl->use_weight == 1)
mc_part_weighted(h, sl, n, square, height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put,
weight_op[0], weight_op[1], weight_avg[0],
weight_avg[1], list0, list1, PIXEL_SHIFT, CHROMA_IDC);
@ -60,7 +61,8 @@ static void mc_part(H264Context *h, int n, int square,
chroma_avg, list0, list1, PIXEL_SHIFT, CHROMA_IDC);
}
static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y,
static void MCFUNC(hl_motion)(H264Context *h, H264SliceContext *sl,
uint8_t *dest_y,
uint8_t *dest_cb, uint8_t *dest_cr,
qpel_mc_func(*qpix_put)[16],
h264_chroma_mc_func(*chroma_put),
@ -79,25 +81,25 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y,
prefetch_motion(h, 0, PIXEL_SHIFT, CHROMA_IDC);
if (IS_16X16(mb_type)) {
mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
mc_part(h, sl, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
weight_op, weight_avg,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
} else if (IS_16X8(mb_type)) {
mc_part(h, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0,
mc_part(h, sl, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
weight_op, weight_avg,
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
mc_part(h, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4,
mc_part(h, sl, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4,
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
weight_op, weight_avg,
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
} else if (IS_8X16(mb_type)) {
mc_part(h, 0, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
mc_part(h, sl, 0, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
mc_part(h, 4, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
mc_part(h, sl, 4, 0, 16, 8 * h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[1], &weight_avg[1],
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
@ -113,29 +115,29 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y,
int y_offset = (i & 2) << 1;
if (IS_SUB_8X8(sub_mb_type)) {
mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr,
mc_part(h, sl, n, 1, 8, 0, dest_y, dest_cb, dest_cr,
x_offset, y_offset,
qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
} else if (IS_SUB_8X4(sub_mb_type)) {
mc_part(h, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr,
mc_part(h, sl, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr,
x_offset, y_offset,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
mc_part(h, n + 2, 0, 4, 4 << PIXEL_SHIFT,
mc_part(h, sl, n + 2, 0, 4, 4 << PIXEL_SHIFT,
dest_y, dest_cb, dest_cr, x_offset, y_offset + 2,
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
&weight_op[1], &weight_avg[1],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
} else if (IS_SUB_4X8(sub_mb_type)) {
mc_part(h, n, 0, 8, 4 * h->mb_linesize,
mc_part(h, sl, n, 0, 8, 4 * h->mb_linesize,
dest_y, dest_cb, dest_cr, x_offset, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[2], &weight_avg[2],
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
mc_part(h, n + 1, 0, 8, 4 * h->mb_linesize,
mc_part(h, sl, n + 1, 0, 8, 4 * h->mb_linesize,
dest_y, dest_cb, dest_cr, x_offset + 2, y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[2], &weight_avg[2],
@ -146,7 +148,7 @@ static void MCFUNC(hl_motion)(H264Context *h, uint8_t *dest_y,
for (j = 0; j < 4; j++) {
int sub_x_offset = x_offset + 2 * (j & 1);
int sub_y_offset = y_offset + (j & 2);
mc_part(h, n + j, 1, 4, 0,
mc_part(h, sl, n + j, 1, 4, 0,
dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
&weight_op[2], &weight_avg[2],

@ -102,6 +102,7 @@ static int scan_mmco_reset(AVCodecParserContext *s)
{
H264ParseContext *p = s->priv_data;
H264Context *h = &p->h;
H264SliceContext *sl = &h->slice_ctx[0];
h->slice_type_nos = s->pict_type & 3;
@ -141,7 +142,7 @@ static int scan_mmco_reset(AVCodecParserContext *s)
if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) ||
(h->pps.weighted_bipred_idc == 1 && h->slice_type_nos == AV_PICTURE_TYPE_B))
ff_pred_weight_table(h);
ff_pred_weight_table(h, sl);
if (get_bits1(&h->gb)) { // adaptive_ref_pic_marking_mode_flag
int i;
@ -543,6 +544,12 @@ static av_cold int init(AVCodecParserContext *s)
{
H264ParseContext *p = s->priv_data;
H264Context *h = &p->h;
h->slice_ctx = av_mallocz(sizeof(*h->slice_ctx));
if (!h->slice_ctx)
return 0;
h->nb_slice_ctx = 1;
h->thread_context[0] = h;
h->slice_context_count = 1;
ff_h264dsp_init(&h->h264dsp, 8, 1);

@ -337,7 +337,7 @@ int ff_h264_decode_ref_pic_list_reordering(H264Context *h)
return 0;
}
void ff_h264_fill_mbaff_ref_list(H264Context *h)
void ff_h264_fill_mbaff_ref_list(H264Context *h, H264SliceContext *sl)
{
int list, i, j;
for (list = 0; list < 2; list++) { //FIXME try list_count
@ -355,11 +355,11 @@ void ff_h264_fill_mbaff_ref_list(H264Context *h)
field[1].reference = PICT_BOTTOM_FIELD;
field[1].poc = field[1].field_poc[1];
h->luma_weight[16 + 2 * i][list][0] = h->luma_weight[16 + 2 * i + 1][list][0] = h->luma_weight[i][list][0];
h->luma_weight[16 + 2 * i][list][1] = h->luma_weight[16 + 2 * i + 1][list][1] = h->luma_weight[i][list][1];
sl->luma_weight[16 + 2 * i][list][0] = sl->luma_weight[16 + 2 * i + 1][list][0] = sl->luma_weight[i][list][0];
sl->luma_weight[16 + 2 * i][list][1] = sl->luma_weight[16 + 2 * i + 1][list][1] = sl->luma_weight[i][list][1];
for (j = 0; j < 2; j++) {
h->chroma_weight[16 + 2 * i][list][j][0] = h->chroma_weight[16 + 2 * i + 1][list][j][0] = h->chroma_weight[i][list][j][0];
h->chroma_weight[16 + 2 * i][list][j][1] = h->chroma_weight[16 + 2 * i + 1][list][j][1] = h->chroma_weight[i][list][j][1];
sl->chroma_weight[16 + 2 * i][list][j][0] = sl->chroma_weight[16 + 2 * i + 1][list][j][0] = sl->chroma_weight[i][list][j][0];
sl->chroma_weight[16 + 2 * i][list][j][1] = sl->chroma_weight[16 + 2 * i + 1][list][j][1] = sl->chroma_weight[i][list][j][1];
}
}
}

@ -484,6 +484,8 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
}
if (!inited) {
H264SliceContext *orig_slice_ctx = h->slice_ctx;
for (i = 0; i < MAX_SPS_COUNT; i++)
av_freep(h->sps_buffers + i);
@ -503,6 +505,8 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
av_frame_unref(&h->cur_pic.f);
h->cur_pic.tf.f = &h->cur_pic.f;
h->slice_ctx = orig_slice_ctx;
h->avctx = dst;
h->DPB = NULL;
h->qscale_table_pool = NULL;
@ -814,13 +818,13 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
* @param field 0/1 initialize the weight for interlaced MBAFF
* -1 initializes the rest
*/
static void implicit_weight_table(H264Context *h, int field)
static void implicit_weight_table(H264Context *h, H264SliceContext *sl, int field)
{
int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
for (i = 0; i < 2; i++) {
h->luma_weight_flag[i] = 0;
h->chroma_weight_flag[i] = 0;
sl->luma_weight_flag[i] = 0;
sl->chroma_weight_flag[i] = 0;
}
if (field < 0) {
@ -831,8 +835,8 @@ static void implicit_weight_table(H264Context *h, int field)
}
if (h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF(h) &&
h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2 * cur_poc) {
h->use_weight = 0;
h->use_weight_chroma = 0;
sl->use_weight = 0;
sl->use_weight_chroma = 0;
return;
}
ref_start = 0;
@ -845,10 +849,10 @@ static void implicit_weight_table(H264Context *h, int field)
ref_count1 = 16 + 2 * h->ref_count[1];
}
h->use_weight = 2;
h->use_weight_chroma = 2;
h->luma_log2_weight_denom = 5;
h->chroma_log2_weight_denom = 5;
sl->use_weight = 2;
sl->use_weight_chroma = 2;
sl->luma_log2_weight_denom = 5;
sl->chroma_log2_weight_denom = 5;
for (ref0 = ref_start; ref0 < ref_count0; ref0++) {
int poc0 = h->ref_list[0][ref0].poc;
@ -866,10 +870,10 @@ static void implicit_weight_table(H264Context *h, int field)
}
}
if (field < 0) {
h->implicit_weight[ref0][ref1][0] =
h->implicit_weight[ref0][ref1][1] = w;
sl->implicit_weight[ref0][ref1][0] =
sl->implicit_weight[ref0][ref1][1] = w;
} else {
h->implicit_weight[ref0][ref1][field] = w;
sl->implicit_weight[ref0][ref1][field] = w;
}
}
}
@ -1139,6 +1143,8 @@ static int h264_slice_header_init(H264Context *h, int reinit)
c->workaround_bugs = h->workaround_bugs;
c->pict_type = h->pict_type;
h->slice_ctx[i].h264 = c;
init_scan_tables(c);
clone_tables(c, h, i);
c->context_initialized = 1;
@ -1166,7 +1172,7 @@ static int h264_slice_header_init(H264Context *h, int reinit)
*
* @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
*/
int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl, H264Context *h0)
{
unsigned int first_mb_in_slice;
unsigned int pps_id;
@ -1606,15 +1612,15 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
if ((h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P) ||
(h->pps.weighted_bipred_idc == 1 &&
h->slice_type_nos == AV_PICTURE_TYPE_B))
ff_pred_weight_table(h);
ff_pred_weight_table(h, sl);
else if (h->pps.weighted_bipred_idc == 2 &&
h->slice_type_nos == AV_PICTURE_TYPE_B) {
implicit_weight_table(h, -1);
implicit_weight_table(h, sl, -1);
} else {
h->use_weight = 0;
sl->use_weight = 0;
for (i = 0; i < 2; i++) {
h->luma_weight_flag[i] = 0;
h->chroma_weight_flag[i] = 0;
sl->luma_weight_flag[i] = 0;
sl->chroma_weight_flag[i] = 0;
}
}
@ -1632,11 +1638,11 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
}
if (FRAME_MBAFF(h)) {
ff_h264_fill_mbaff_ref_list(h);
ff_h264_fill_mbaff_ref_list(h, sl);
if (h->pps.weighted_bipred_idc == 2 && h->slice_type_nos == AV_PICTURE_TYPE_B) {
implicit_weight_table(h, 0);
implicit_weight_table(h, 1);
implicit_weight_table(h, sl, 0);
implicit_weight_table(h, sl, 1);
}
}
@ -1789,8 +1795,8 @@ int ff_h264_decode_slice_header(H264Context *h, H264Context *h0)
h->qscale,
h->deblocking_filter,
h->slice_alpha_c0_offset, h->slice_beta_offset,
h->use_weight,
h->use_weight == 1 && h->use_weight_chroma ? "c" : "",
sl->use_weight,
sl->use_weight == 1 && sl->use_weight_chroma ? "c" : "",
h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : "");
}
@ -2170,7 +2176,8 @@ static void er_add_slice(H264Context *h, int startx, int starty,
static int decode_slice(struct AVCodecContext *avctx, void *arg)
{
H264Context *h = *(void **)arg;
H264SliceContext *sl = arg;
H264Context *h = sl->h264;
int lf_x_start = h->mb_x;
h->mb_skip_run = -1;
@ -2197,7 +2204,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
// STOP_TIMER("decode_mb_cabac")
if (ret >= 0)
ff_h264_hl_decode_mb(h);
ff_h264_hl_decode_mb(h, sl);
// FIXME optimal? or let mb_decode decode 16x32 ?
if (ret >= 0 && FRAME_MBAFF(h)) {
@ -2206,7 +2213,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
ret = ff_h264_decode_mb_cabac(h);
if (ret >= 0)
ff_h264_hl_decode_mb(h);
ff_h264_hl_decode_mb(h, sl);
h->mb_y--;
}
eos = get_cabac_terminate(&h->cabac);
@ -2256,7 +2263,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
int ret = ff_h264_decode_mb_cavlc(h);
if (ret >= 0)
ff_h264_hl_decode_mb(h);
ff_h264_hl_decode_mb(h, sl);
// FIXME optimal? or let mb_decode decode 16x32 ?
if (ret >= 0 && FRAME_MBAFF(h)) {
@ -2264,7 +2271,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg)
ret = ff_h264_decode_mb_cavlc(h);
if (ret >= 0)
ff_h264_hl_decode_mb(h);
ff_h264_hl_decode_mb(h, sl);
h->mb_y--;
}
@ -2341,15 +2348,15 @@ int ff_h264_execute_decode_slices(H264Context *h, unsigned context_count)
if (h->avctx->hwaccel)
return 0;
if (context_count == 1) {
return decode_slice(avctx, &h);
return decode_slice(avctx, &h->slice_ctx[0]);
} else {
for (i = 1; i < context_count; i++) {
hx = h->thread_context[i];
hx->er.error_count = 0;
}
avctx->execute(avctx, decode_slice, h->thread_context,
NULL, context_count, sizeof(void *));
avctx->execute(avctx, decode_slice, h->slice_ctx,
NULL, context_count, sizeof(h->slice_ctx[0]));
/* pull back stuff from slices to master context */
hx = h->thread_context[context_count - 1];

@ -1268,7 +1268,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
}
if (mb_type != 0)
ff_h264_hl_decode_mb(h);
ff_h264_hl_decode_mb(h, &h->slice_ctx[0]);
if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
h->cur_pic.mb_type[h->mb_x + h->mb_y * h->mb_stride] =

@ -192,27 +192,28 @@ static void fill_vaapi_plain_pred_weight_table(H264Context *h,
short chroma_weight[32][2],
short chroma_offset[32][2])
{
H264SliceContext *sl = &h->slice_ctx[0];
unsigned int i, j;
*luma_weight_flag = h->luma_weight_flag[list];
*chroma_weight_flag = h->chroma_weight_flag[list];
*luma_weight_flag = sl->luma_weight_flag[list];
*chroma_weight_flag = sl->chroma_weight_flag[list];
for (i = 0; i < h->ref_count[list]; i++) {
/* VA API also wants the inferred (default) values, not
only what is available in the bitstream (7.4.3.2). */
if (h->luma_weight_flag[list]) {
luma_weight[i] = h->luma_weight[i][list][0];
luma_offset[i] = h->luma_weight[i][list][1];
if (sl->luma_weight_flag[list]) {
luma_weight[i] = sl->luma_weight[i][list][0];
luma_offset[i] = sl->luma_weight[i][list][1];
} else {
luma_weight[i] = 1 << h->luma_log2_weight_denom;
luma_weight[i] = 1 << sl->luma_log2_weight_denom;
luma_offset[i] = 0;
}
for (j = 0; j < 2; j++) {
if (h->chroma_weight_flag[list]) {
chroma_weight[i][j] = h->chroma_weight[i][list][j][0];
chroma_offset[i][j] = h->chroma_weight[i][list][j][1];
if (sl->chroma_weight_flag[list]) {
chroma_weight[i][j] = sl->chroma_weight[i][list][j][0];
chroma_offset[i][j] = sl->chroma_weight[i][list][j][1];
} else {
chroma_weight[i][j] = 1 << h->chroma_log2_weight_denom;
chroma_weight[i][j] = 1 << sl->chroma_log2_weight_denom;
chroma_offset[i][j] = 0;
}
}
@ -316,6 +317,7 @@ static int vaapi_h264_decode_slice(AVCodecContext *avctx,
uint32_t size)
{
H264Context * const h = avctx->priv_data;
H264SliceContext *sl = &h->slice_ctx[0];
VASliceParameterBufferH264 *slice_param;
av_dlog(avctx, "vaapi_h264_decode_slice(): buffer %p, size %d\n",
@ -336,8 +338,8 @@ static int vaapi_h264_decode_slice(AVCodecContext *avctx,
slice_param->disable_deblocking_filter_idc = h->deblocking_filter < 2 ? !h->deblocking_filter : h->deblocking_filter;
slice_param->slice_alpha_c0_offset_div2 = h->slice_alpha_c0_offset / 2;
slice_param->slice_beta_offset_div2 = h->slice_beta_offset / 2;
slice_param->luma_log2_weight_denom = h->luma_log2_weight_denom;
slice_param->chroma_log2_weight_denom = h->chroma_log2_weight_denom;
slice_param->luma_log2_weight_denom = sl->luma_log2_weight_denom;
slice_param->chroma_log2_weight_denom = sl->chroma_log2_weight_denom;
fill_vaapi_RefPicList(slice_param->RefPicList0, h->ref_list[0], h->list_count > 0 ? h->ref_count[0] : 0);
fill_vaapi_RefPicList(slice_param->RefPicList1, h->ref_list[1], h->list_count > 1 ? h->ref_count[1] : 0);

Loading…
Cancel
Save