vp8: refactor decoding a single mb_row

This is in preperation for sliced threading.

Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
pull/59/head
Daniel Kang 13 years ago committed by Luca Barbato
parent 45ff9e5b57
commit 337ade52de
  1. 164
      libavcodec/vp8.c

@ -1574,11 +1574,95 @@ static void release_queued_segmaps(VP8Context *s, int is_close)
s->maps_are_invalid = 0; s->maps_are_invalid = 0;
} }
#define MARGIN (16 << 2)
static void vp8_decode_mb_row(AVCodecContext *avctx, AVFrame *curframe,
AVFrame *prev_frame, int mb_y)
{
VP8Context *s = avctx->priv_data;
VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
int i, y, mb_x, mb_xy = mb_y*s->mb_width;
uint8_t *dst[3] = {
curframe->data[0] + 16*mb_y*s->linesize,
curframe->data[1] + 8*mb_y*s->uvlinesize,
curframe->data[2] + 8*mb_y*s->uvlinesize
};
memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
memset(s->left_nnz, 0, sizeof(s->left_nnz));
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
// left edge of 129 for intra prediction
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
for (i = 0; i < 3; i++)
for (y = 0; y < 16>>!!i; y++)
dst[i][y*curframe->linesize[i]-1] = 129;
if (mb_y == 1) // top left edge is also 129
s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
}
s->mv_min.x = -MARGIN;
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
/* Prefetch the current frame, 4 MBs ahead */
s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
if (!mb->skip)
decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
if (mb->mode <= MODE_I4x4)
intra_predict(s, dst, mb, mb_x, mb_y);
else
inter_predict(s, dst, mb, mb_x, mb_y);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
if (!mb->skip) {
idct_mb(s, dst, mb);
} else {
AV_ZERO64(s->left_nnz);
AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
// Reset DC block predictors if they would exist if the mb had coefficients
if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
s->left_nnz[8] = 0;
s->top_nnz[mb_x][8] = 0;
}
}
if (s->deblock_filter)
filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
dst[0] += 16;
dst[1] += 8;
dst[2] += 8;
s->mv_min.x -= 64;
s->mv_max.x -= 64;
}
if (s->deblock_filter) {
if (s->filter.simple)
filter_mb_row_simple(s, curframe, mb_y);
else
filter_mb_row(s, curframe, mb_y);
}
s->mv_min.y -= 64;
s->mv_max.y -= 64;
}
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
AVPacket *avpkt) AVPacket *avpkt)
{ {
VP8Context *s = avctx->priv_data; VP8Context *s = avctx->priv_data;
int ret, mb_x, mb_y, i, y, referenced; int ret, mb_y, i, referenced;
enum AVDiscard skip_thresh; enum AVDiscard skip_thresh;
AVFrame *av_uninit(curframe), *prev_frame; AVFrame *av_uninit(curframe), *prev_frame;
@ -1686,90 +1770,14 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
if (s->keyframe) if (s->keyframe)
memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4); memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
#define MARGIN (16 << 2)
s->mv_min.y = -MARGIN; s->mv_min.y = -MARGIN;
s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
for (mb_y = 0; mb_y < s->mb_height; mb_y++) { for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
int mb_xy = mb_y*s->mb_width;
uint8_t *dst[3] = {
curframe->data[0] + 16*mb_y*s->linesize,
curframe->data[1] + 8*mb_y*s->uvlinesize,
curframe->data[2] + 8*mb_y*s->uvlinesize
};
memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
memset(s->left_nnz, 0, sizeof(s->left_nnz));
AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
// left edge of 129 for intra prediction
if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
for (i = 0; i < 3; i++)
for (y = 0; y < 16>>!!i; y++)
dst[i][y*curframe->linesize[i]-1] = 129;
if (mb_y == 1) // top left edge is also 129
s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
}
s->mv_min.x = -MARGIN;
s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map) if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
ff_thread_await_progress(prev_frame, mb_y, 0); ff_thread_await_progress(prev_frame, mb_y, 0);
for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { vp8_decode_mb_row(avctx, curframe, prev_frame, mb_y);
/* Prefetch the current frame, 4 MBs ahead */
s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
if (!mb->skip)
decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz);
if (mb->mode <= MODE_I4x4)
intra_predict(s, dst, mb, mb_x, mb_y);
else
inter_predict(s, dst, mb, mb_x, mb_y);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
if (!mb->skip) {
idct_mb(s, dst, mb);
} else {
AV_ZERO64(s->left_nnz);
AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
// Reset DC block predictors if they would exist if the mb had coefficients
if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
s->left_nnz[8] = 0;
s->top_nnz[mb_x][8] = 0;
}
}
if (s->deblock_filter)
filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
dst[0] += 16;
dst[1] += 8;
dst[2] += 8;
s->mv_min.x -= 64;
s->mv_max.x -= 64;
}
if (s->deblock_filter) {
if (s->filter.simple)
filter_mb_row_simple(s, curframe, mb_y);
else
filter_mb_row(s, curframe, mb_y);
}
s->mv_min.y -= 64;
s->mv_max.y -= 64;
ff_thread_report_progress(curframe, mb_y, 0); ff_thread_report_progress(curframe, mb_y, 0);
} }

Loading…
Cancel
Save