svq3: eliminate remaining H264SliceContext usage

pull/208/merge
Anton Khirnov 9 years ago
parent 5a5db90edf
commit 939b388383
  1. 149
      libavcodec/svq3.c

@ -90,6 +90,9 @@ typedef struct SVQ3Context {
int h_edge_pos;
int v_edge_pos;
int last_frame_output;
int slice_num;
int qscale;
int cbp;
enum AVPictureType pict_type;
@ -115,6 +118,9 @@ typedef struct SVQ3Context {
DECLARE_ALIGNED(16, int16_t, mv_cache)[2][5 * 8][2];
DECLARE_ALIGNED(8, int8_t, ref_cache)[2][5 * 8];
DECLARE_ALIGNED(16, int16_t, mb)[16 * 48 * 2];
DECLARE_ALIGNED(16, int16_t, mb_luma_dc)[3][16 * 2];
DECLARE_ALIGNED(8, uint8_t, non_zero_count_cache)[15 * 8];
uint32_t dequant4_coeff[QP_MAX_NUM + 1][16];
} SVQ3Context;
@ -399,11 +405,12 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
int thirdpel, int dir, int avg)
{
H264Context *h = &s->h;
H264SliceContext *sl = &h->slice_ctx[0];
const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
uint8_t *src, *dest;
int i, emu = 0;
int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
int linesize = s->cur_pic->f->linesize[0];
int uvlinesize = s->cur_pic->f->linesize[1];
mx += x;
my += y;
@ -416,23 +423,23 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
}
/* form component predictions */
dest = h->cur_pic.f->data[0] + x + y * sl->linesize;
src = pic->f->data[0] + mx + my * sl->linesize;
dest = h->cur_pic.f->data[0] + x + y * linesize;
src = pic->f->data[0] + mx + my * linesize;
if (emu) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
sl->linesize, sl->linesize,
linesize, linesize,
width + 1, height + 1,
mx, my, s->h_edge_pos, s->v_edge_pos);
src = s->edge_emu_buffer;
}
if (thirdpel)
(avg ? s->tdsp.avg_tpel_pixels_tab
: s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, sl->linesize,
: s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, linesize,
width, height);
else
(avg ? s->hdsp.avg_pixels_tab
: s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, sl->linesize,
: s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, linesize,
height);
if (!(h->flags & AV_CODEC_FLAG_GRAY)) {
@ -443,12 +450,12 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
blocksize++;
for (i = 1; i < 3; i++) {
dest = h->cur_pic.f->data[i] + (x >> 1) + (y >> 1) * sl->uvlinesize;
src = pic->f->data[i] + mx + my * sl->uvlinesize;
dest = h->cur_pic.f->data[i] + (x >> 1) + (y >> 1) * uvlinesize;
src = pic->f->data[i] + mx + my * uvlinesize;
if (emu) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer, src,
sl->uvlinesize, sl->uvlinesize,
uvlinesize, uvlinesize,
width + 1, height + 1,
mx, my, (s->h_edge_pos >> 1),
s->v_edge_pos >> 1);
@ -457,12 +464,12 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
if (thirdpel)
(avg ? s->tdsp.avg_tpel_pixels_tab
: s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
sl->uvlinesize,
uvlinesize,
width, height);
else
(avg ? s->hdsp.avg_pixels_tab
: s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
sl->uvlinesize,
uvlinesize,
height);
}
}
@ -582,17 +589,17 @@ static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
return 0;
}
static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl,
static av_always_inline void hl_decode_mb_idct_luma(SVQ3Context *s,
int mb_type, const int *block_offset,
int linesize, uint8_t *dest_y)
{
int i;
if (!IS_INTRA4x4(mb_type)) {
for (i = 0; i < 16; i++)
if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
uint8_t *const ptr = dest_y + block_offset[i];
svq3_add_idct_c(ptr, sl->mb + i * 16, linesize,
sl->qscale, IS_INTRA(mb_type) ? 1 : 0);
svq3_add_idct_c(ptr, s->mb + i * 16, linesize,
s->qscale, IS_INTRA(mb_type) ? 1 : 0);
}
}
}
@ -604,14 +611,13 @@ static av_always_inline int dctcoef_get(int16_t *mb, int index)
static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
const H264Context *h,
H264SliceContext *sl,
int mb_type,
const int *block_offset,
int linesize,
uint8_t *dest_y)
{
int i;
int qscale = sl->qscale;
int qscale = s->qscale;
if (IS_INTRA4x4(mb_type)) {
for (i = 0; i < 16; i++) {
@ -632,18 +638,18 @@ static av_always_inline void hl_decode_mb_predict_luma(SVQ3Context *s,
topright = NULL;
s->hpc.pred4x4[dir](ptr, topright, linesize);
nnz = sl->non_zero_count_cache[scan8[i]];
nnz = s->non_zero_count_cache[scan8[i]];
if (nnz) {
svq3_add_idct_c(ptr, sl->mb + i * 16, linesize, qscale, 0);
svq3_add_idct_c(ptr, s->mb + i * 16, linesize, qscale, 0);
}
}
} else {
s->hpc.pred16x16[s->intra16x16_pred_mode](dest_y, linesize);
svq3_luma_dc_dequant_idct_c(sl->mb, sl->mb_luma_dc[0], qscale);
svq3_luma_dc_dequant_idct_c(s->mb, s->mb_luma_dc[0], qscale);
}
}
static void hl_decode_mb(SVQ3Context *s, const H264Context *h, H264SliceContext *sl)
static void hl_decode_mb(SVQ3Context *s, const H264Context *h)
{
const int mb_x = s->mb_x;
const int mb_y = s->mb_y;
@ -655,37 +661,37 @@ static void hl_decode_mb(SVQ3Context *s, const H264Context *h, H264SliceContext
const int *block_offset = &h->block_offset[0];
const int block_h = 16 >> h->chroma_y_shift;
dest_y = h->cur_pic.f->data[0] + (mb_x + mb_y * sl->linesize) * 16;
dest_cb = h->cur_pic.f->data[1] + mb_x * 8 + mb_y * sl->uvlinesize * block_h;
dest_cr = h->cur_pic.f->data[2] + mb_x * 8 + mb_y * sl->uvlinesize * block_h;
linesize = s->cur_pic->f->linesize[0];
uvlinesize = s->cur_pic->f->linesize[1];
s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * sl->linesize + 64, sl->linesize, 4);
s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * sl->uvlinesize + 64, dest_cr - dest_cb, 2);
dest_y = h->cur_pic.f->data[0] + (mb_x + mb_y * linesize) * 16;
dest_cb = h->cur_pic.f->data[1] + mb_x * 8 + mb_y * uvlinesize * block_h;
dest_cr = h->cur_pic.f->data[2] + mb_x * 8 + mb_y * uvlinesize * block_h;
linesize = sl->mb_linesize = sl->linesize;
uvlinesize = sl->mb_uvlinesize = sl->uvlinesize;
s->vdsp.prefetch(dest_y + (s->mb_x & 3) * 4 * linesize + 64, linesize, 4);
s->vdsp.prefetch(dest_cb + (s->mb_x & 7) * uvlinesize + 64, dest_cr - dest_cb, 2);
if (IS_INTRA(mb_type)) {
s->hpc.pred8x8[s->chroma_pred_mode](dest_cb, uvlinesize);
s->hpc.pred8x8[s->chroma_pred_mode](dest_cr, uvlinesize);
hl_decode_mb_predict_luma(s, h, sl, mb_type, block_offset, linesize, dest_y);
hl_decode_mb_predict_luma(s, h, mb_type, block_offset, linesize, dest_y);
}
hl_decode_mb_idct_luma(h, sl, mb_type, block_offset, linesize, dest_y);
hl_decode_mb_idct_luma(s, mb_type, block_offset, linesize, dest_y);
if (sl->cbp & 0x30) {
if (s->cbp & 0x30) {
uint8_t *dest[2] = { dest_cb, dest_cr };
s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 1,
s->dequant4_coeff[sl->chroma_qp[0]][0]);
s->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 2,
s->dequant4_coeff[sl->chroma_qp[1]][0]);
s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 1,
s->dequant4_coeff[4][0]);
s->h264dsp.h264_chroma_dc_dequant_idct(s->mb + 16 * 16 * 2,
s->dequant4_coeff[4][0]);
for (j = 1; j < 3; j++) {
for (i = j * 16; i < j * 16 + 4; i++)
if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
if (s->non_zero_count_cache[scan8[i]] || s->mb[i * 16]) {
uint8_t *const ptr = dest[j - 1] + block_offset[i];
svq3_add_idct_c(ptr, sl->mb + i * 16,
uvlinesize, ff_h264_chroma_qp[0][sl->qscale + 12] - 12, 2);
svq3_add_idct_c(ptr, s->mb + i * 16,
uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
}
}
}
@ -694,7 +700,6 @@ static void hl_decode_mb(SVQ3Context *s, const H264Context *h, H264SliceContext
static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
{
H264Context *h = &s->h;
H264SliceContext *sl = &h->slice_ctx[0];
int i, j, k, m, dir, mode;
int cbp = 0;
uint32_t vlc;
@ -905,7 +910,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
memset(s->intra4x4_pred_mode + s->mb2br_xy[mb_xy], DC_PRED, 8);
}
if (!IS_SKIP(mb_type) || s->pict_type == AV_PICTURE_TYPE_B) {
memset(sl->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
memset(s->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
}
if (!IS_INTRA16x16(mb_type) &&
@ -920,17 +925,17 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
}
if (IS_INTRA16x16(mb_type) ||
(s->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
sl->qscale += svq3_get_se_golomb(&h->gb);
s->qscale += svq3_get_se_golomb(&h->gb);
if (sl->qscale > 31u) {
av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", sl->qscale);
if (s->qscale > 31u) {
av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
return -1;
}
}
if (IS_INTRA16x16(mb_type)) {
AV_ZERO128(sl->mb_luma_dc[0] + 0);
AV_ZERO128(sl->mb_luma_dc[0] + 8);
if (svq3_decode_block(&h->gb, sl->mb_luma_dc[0], 0, 1)) {
AV_ZERO128(s->mb_luma_dc[0] + 0);
AV_ZERO128(s->mb_luma_dc[0] + 8);
if (svq3_decode_block(&h->gb, s->mb_luma_dc[0], 0, 1)) {
av_log(h->avctx, AV_LOG_ERROR,
"error while decoding intra luma dc\n");
return -1;
@ -939,7 +944,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
if (cbp) {
const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
const int type = ((sl->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
for (i = 0; i < 4; i++)
if ((cbp & (1 << i))) {
@ -947,9 +952,9 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
k = index ? (1 * (j & 1) + 2 * (i & 1) +
2 * (j & 2) + 4 * (i & 2))
: (4 * i + j);
sl->non_zero_count_cache[scan8[k]] = 1;
s->non_zero_count_cache[scan8[k]] = 1;
if (svq3_decode_block(&h->gb, &sl->mb[16 * k], index, type)) {
if (svq3_decode_block(&h->gb, &s->mb[16 * k], index, type)) {
av_log(h->avctx, AV_LOG_ERROR,
"error while decoding block\n");
return -1;
@ -959,7 +964,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
if ((cbp & 0x30)) {
for (i = 1; i < 3; ++i)
if (svq3_decode_block(&h->gb, &sl->mb[16 * 16 * i], 0, 3)) {
if (svq3_decode_block(&h->gb, &s->mb[16 * 16 * i], 0, 3)) {
av_log(h->avctx, AV_LOG_ERROR,
"error while decoding chroma dc block\n");
return -1;
@ -969,9 +974,9 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
for (i = 1; i < 3; i++) {
for (j = 0; j < 4; j++) {
k = 16 * i + j;
sl->non_zero_count_cache[scan8[k]] = 1;
s->non_zero_count_cache[scan8[k]] = 1;
if (svq3_decode_block(&h->gb, &sl->mb[16 * k], 1, 1)) {
if (svq3_decode_block(&h->gb, &s->mb[16 * k], 1, 1)) {
av_log(h->avctx, AV_LOG_ERROR,
"error while decoding chroma ac block\n");
return -1;
@ -982,7 +987,7 @@ static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
}
}
sl->cbp = cbp;
s->cbp = cbp;
h->cur_pic.mb_type[mb_xy] = mb_type;
if (IS_INTRA(mb_type))
@ -996,7 +1001,6 @@ static int svq3_decode_slice_header(AVCodecContext *avctx)
{
SVQ3Context *s = avctx->priv_data;
H264Context *h = &s->h;
H264SliceContext *sl = &h->slice_ctx[0];
const int mb_xy = s->mb_xy;
int i, header;
unsigned slice_id;
@ -1048,16 +1052,14 @@ static int svq3_decode_slice_header(AVCodecContext *avctx)
s->pict_type = ff_h264_golomb_to_pict_type[slice_id];
if ((header & 0x9F) == 2) {
i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
sl->mb_skip_run = get_bits(&h->gb, i) -
(s->mb_y * s->mb_width + s->mb_x);
i = (s->mb_num < 64) ? 6 : (1 + av_log2(s->mb_num - 1));
get_bits(&h->gb, i);
} else {
skip_bits1(&h->gb);
sl->mb_skip_run = 0;
}
sl->slice_num = get_bits(&h->gb, 8);
sl->qscale = get_bits(&h->gb, 5);
s->slice_num = get_bits(&h->gb, 8);
s->qscale = get_bits(&h->gb, 5);
s->adaptive_quant = get_bits1(&h->gb);
/* unknown fields */
@ -1108,7 +1110,6 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
{
SVQ3Context *s = avctx->priv_data;
H264Context *h = &s->h;
H264SliceContext *sl;
int m, x, y;
unsigned char *extradata;
unsigned char *extradata_end;
@ -1147,15 +1148,11 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
ff_hpeldsp_init(&s->hdsp, avctx->flags);
ff_tpeldsp_init(&s->tdsp);
sl = h->slice_ctx;
h->flags = avctx->flags;
sl->is_complex = 1;
h->picture_structure = PICT_FRAME;
avctx->pix_fmt = AV_PIX_FMT_YUVJ420P;
avctx->color_range = AVCOL_RANGE_JPEG;
h->slice_ctx[0].chroma_qp[0] = h->slice_ctx[0].chroma_qp[1] = 4;
h->chroma_x_shift = h->chroma_y_shift = 1;
s->halfpel_flag = 1;
@ -1328,8 +1325,6 @@ static void free_picture(AVCodecContext *avctx, H264Picture *pic)
static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
{
SVQ3Context *s = avctx->priv_data;
H264Context *h = &s->h;
H264SliceContext *sl = &h->slice_ctx[0];
const int big_mb_num = s->mb_stride * (s->mb_height + 1) + 1;
const int mb_array_size = s->mb_stride * s->mb_height;
const int b4_stride = s->mb_width * 4 + 1;
@ -1369,9 +1364,6 @@ static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
return AVERROR(ENOMEM);
}
sl->linesize = pic->f->linesize[0];
sl->uvlinesize = pic->f->linesize[1];
return 0;
fail:
free_picture(avctx, pic);
@ -1384,7 +1376,6 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
const uint8_t *buf = avpkt->data;
SVQ3Context *s = avctx->priv_data;
H264Context *h = &s->h;
H264SliceContext *sl = &h->slice_ctx[0];
int buf_size = avpkt->size;
int ret, m, i;
@ -1426,14 +1417,14 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
h->cur_pic = *s->cur_pic;
for (i = 0; i < 16; i++) {
h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
h->block_offset[i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
h->block_offset[48 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[0] * ((scan8[i] - scan8[0]) >> 3);
}
for (i = 0; i < 16; i++) {
h->block_offset[16 + i] =
h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
h->block_offset[32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
h->block_offset[48 + 16 + i] =
h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * s->cur_pic->f->linesize[1] * ((scan8[i] - scan8[0]) >> 3);
}
if (s->pict_type != AV_PICTURE_TYPE_I) {
@ -1467,7 +1458,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
"%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
av_get_picture_type_char(s->pict_type),
s->halfpel_flag, s->thirdpel_flag,
s->adaptive_quant, h->slice_ctx[0].qscale, sl->slice_num);
s->adaptive_quant, s->qscale, s->slice_num);
if (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == AV_PICTURE_TYPE_B ||
avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != AV_PICTURE_TYPE_I ||
@ -1482,7 +1473,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
}
if (s->pict_type == AV_PICTURE_TYPE_B) {
h->frame_num_offset = sl->slice_num - h->prev_frame_num;
h->frame_num_offset = s->slice_num - h->prev_frame_num;
if (h->frame_num_offset < 0)
h->frame_num_offset += 256;
@ -1493,7 +1484,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
}
} else {
h->prev_frame_num = h->frame_num;
h->frame_num = sl->slice_num;
h->frame_num = s->slice_num;
h->prev_frame_num_offset = h->frame_num - h->prev_frame_num;
if (h->prev_frame_num_offset < 0)
@ -1539,7 +1530,7 @@ static int svq3_decode_frame(AVCodecContext *avctx, void *data,
}
if (mb_type != 0)
hl_decode_mb(s, h, &h->slice_ctx[0]);
hl_decode_mb(s, h);
if (s->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
h->cur_pic.mb_type[s->mb_x + s->mb_y * s->mb_stride] =

Loading…
Cancel
Save