avcodec/mpegvideo_enc: Only keep what is used from MECmpContext

A MECmpContext is quite big (792B here) and given
how ff_update_duplicate_context() works, it is (unfortunately)
copied quite frequently when using slice threading.
Therefore keep only what is needed from MECmpContext
and remove MECmpContext from MpegEncContext.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
release/7.1
Andreas Rheinhardt 8 months ago
parent eb3415912b
commit 3b67ab85ee
  1. 4
      libavcodec/me_cmp.c
  2. 2
      libavcodec/mpeg4videoenc.c
  3. 4
      libavcodec/mpegvideo.h
  4. 45
      libavcodec/mpegvideo_enc.c
  5. 2
      libavcodec/x86/me_cmp_init.c

@ -653,7 +653,7 @@ static int dct_sad8x8_c(MpegEncContext *s, const uint8_t *src1,
s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
s->fdsp.fdct(temp); s->fdsp.fdct(temp);
return s->mecc.sum_abs_dctelem(temp); return s->sum_abs_dctelem(temp);
} }
#if CONFIG_GPL #if CONFIG_GPL
@ -819,7 +819,7 @@ static int rd8x8_c(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2,
s->idsp.idct_add(lsrc2, 8, temp); s->idsp.idct_add(lsrc2, 8, temp);
distortion = s->mecc.sse[1](NULL, lsrc2, lsrc1, 8, 8); distortion = s->sse_cmp[1](NULL, lsrc2, lsrc1, 8, 8);
return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7); return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7);
} }

@ -673,7 +673,7 @@ void ff_mpeg4_encode_mb(MpegEncContext *s, int16_t block[6][64],
} }
diff = diff * 256 / (xe * ye); diff = diff * 256 / (xe * ye);
} else { } else {
diff = s->mecc.sad[0](NULL, p_pic, b_pic, s->linesize, 16); diff = s->sad_cmp[0](NULL, p_pic, b_pic, s->linesize, 16);
} }
if (diff > s->qscale * 70) { // FIXME check that 70 is optimal if (diff > s->qscale * 70) { // FIXME check that 70 is optimal
s->mb_skipped = 0; s->mb_skipped = 0;

@ -220,7 +220,6 @@ typedef struct MpegEncContext {
H264ChromaContext h264chroma; H264ChromaContext h264chroma;
HpelDSPContext hdsp; HpelDSPContext hdsp;
IDCTDSPContext idsp; IDCTDSPContext idsp;
MECmpContext mecc;
MpegvideoEncDSPContext mpvencdsp; MpegvideoEncDSPContext mpvencdsp;
PixblockDSPContext pdsp; PixblockDSPContext pdsp;
QpelDSPContext qdsp; QpelDSPContext qdsp;
@ -508,6 +507,9 @@ typedef struct MpegEncContext {
me_cmp_func ildct_cmp[2]; ///< 0 = intra, 1 = non-intra me_cmp_func ildct_cmp[2]; ///< 0 = intra, 1 = non-intra
me_cmp_func n_sse_cmp[2]; ///< either SSE or NSSE cmp func me_cmp_func n_sse_cmp[2]; ///< either SSE or NSSE cmp func
me_cmp_func sad_cmp[2];
me_cmp_func sse_cmp[2];
int (*sum_abs_dctelem)(const int16_t *block);
/** /**
* ratecontrol qmin qmax limiting method * ratecontrol qmin qmax limiting method

@ -308,19 +308,20 @@ av_cold void ff_dct_encode_init(MpegEncContext *s)
static av_cold int me_cmp_init(MpegEncContext *s, AVCodecContext *avctx) static av_cold int me_cmp_init(MpegEncContext *s, AVCodecContext *avctx)
{ {
MECmpContext mecc;
me_cmp_func me_cmp[6]; me_cmp_func me_cmp[6];
int ret; int ret;
ff_me_cmp_init(&s->mecc, avctx); ff_me_cmp_init(&mecc, avctx);
ret = ff_me_init(&s->me, avctx, &s->mecc, 1); ret = ff_me_init(&s->me, avctx, &mecc, 1);
if (ret < 0) if (ret < 0)
return ret; return ret;
ret = ff_set_cmp(&s->mecc, me_cmp, s->frame_skip_cmp, 1); ret = ff_set_cmp(&mecc, me_cmp, s->frame_skip_cmp, 1);
if (ret < 0) if (ret < 0)
return ret; return ret;
s->frame_skip_cmp_fn = me_cmp[1]; s->frame_skip_cmp_fn = me_cmp[1];
if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) { if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
ret = ff_set_cmp(&s->mecc, me_cmp, avctx->ildct_cmp, 1); ret = ff_set_cmp(&mecc, me_cmp, avctx->ildct_cmp, 1);
if (ret < 0) if (ret < 0)
return ret; return ret;
if (!me_cmp[0] || !me_cmp[4]) if (!me_cmp[0] || !me_cmp[4])
@ -329,12 +330,18 @@ static av_cold int me_cmp_init(MpegEncContext *s, AVCodecContext *avctx)
s->ildct_cmp[1] = me_cmp[4]; s->ildct_cmp[1] = me_cmp[4];
} }
s->sum_abs_dctelem = mecc.sum_abs_dctelem;
s->sse_cmp[0] = mecc.sse[0];
s->sse_cmp[1] = mecc.sse[1];
s->sad_cmp[0] = mecc.sad[0];
s->sad_cmp[1] = mecc.sad[1];
if (avctx->mb_cmp == FF_CMP_NSSE) { if (avctx->mb_cmp == FF_CMP_NSSE) {
s->n_sse_cmp[0] = s->mecc.nsse[0]; s->n_sse_cmp[0] = mecc.nsse[0];
s->n_sse_cmp[1] = s->mecc.nsse[1]; s->n_sse_cmp[1] = mecc.nsse[1];
} else { } else {
s->n_sse_cmp[0] = s->mecc.sse[0]; s->n_sse_cmp[0] = mecc.sse[0];
s->n_sse_cmp[1] = s->mecc.sse[1]; s->n_sse_cmp[1] = mecc.sse[1];
} }
return 0; return 0;
@ -1123,7 +1130,7 @@ static int get_intra_count(MpegEncContext *s, const uint8_t *src,
for (y = 0; y < h; y += 16) { for (y = 0; y < h; y += 16) {
for (x = 0; x < w; x += 16) { for (x = 0; x < w; x += 16) {
int offset = x + y * stride; int offset = x + y * stride;
int sad = s->mecc.sad[0](NULL, src + offset, ref + offset, int sad = s->sad_cmp[0](NULL, src + offset, ref + offset,
stride, 16); stride, 16);
int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8; int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
int sae = get_sae(src + offset, mean, stride); int sae = get_sae(src + offset, mean, stride);
@ -2347,26 +2354,26 @@ static av_always_inline void encode_mb_internal(MpegEncContext *s,
/* pre quantization */ /* pre quantization */
if (s->mc_mb_var[s->mb_stride * mb_y + mb_x] < 2 * s->qscale * s->qscale) { if (s->mc_mb_var[s->mb_stride * mb_y + mb_x] < 2 * s->qscale * s->qscale) {
// FIXME optimize // FIXME optimize
if (s->mecc.sad[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale) if (s->sad_cmp[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
skip_dct[0] = 1; skip_dct[0] = 1;
if (s->mecc.sad[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale) if (s->sad_cmp[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
skip_dct[1] = 1; skip_dct[1] = 1;
if (s->mecc.sad[1](NULL, ptr_y + dct_offset, dest_y + dct_offset, if (s->sad_cmp[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
wrap_y, 8) < 20 * s->qscale) wrap_y, 8) < 20 * s->qscale)
skip_dct[2] = 1; skip_dct[2] = 1;
if (s->mecc.sad[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8, if (s->sad_cmp[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
wrap_y, 8) < 20 * s->qscale) wrap_y, 8) < 20 * s->qscale)
skip_dct[3] = 1; skip_dct[3] = 1;
if (s->mecc.sad[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale) if (s->sad_cmp[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
skip_dct[4] = 1; skip_dct[4] = 1;
if (s->mecc.sad[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale) if (s->sad_cmp[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
skip_dct[5] = 1; skip_dct[5] = 1;
if (!chroma_y_shift) { /* 422 */ if (!chroma_y_shift) { /* 422 */
if (s->mecc.sad[1](NULL, ptr_cb + uv_dct_offset, if (s->sad_cmp[1](NULL, ptr_cb + uv_dct_offset,
dest_cb + uv_dct_offset, dest_cb + uv_dct_offset,
wrap_c, 8) < 20 * s->qscale) wrap_c, 8) < 20 * s->qscale)
skip_dct[6] = 1; skip_dct[6] = 1;
if (s->mecc.sad[1](NULL, ptr_cr + uv_dct_offset, if (s->sad_cmp[1](NULL, ptr_cr + uv_dct_offset,
dest_cr + uv_dct_offset, dest_cr + uv_dct_offset,
wrap_c, 8) < 20 * s->qscale) wrap_c, 8) < 20 * s->qscale)
skip_dct[7] = 1; skip_dct[7] = 1;
@ -2647,9 +2654,9 @@ static int sse(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, int
int x,y; int x,y;
if(w==16 && h==16) if(w==16 && h==16)
return s->mecc.sse[0](NULL, src1, src2, stride, 16); return s->sse_cmp[0](NULL, src1, src2, stride, 16);
else if(w==8 && h==8) else if(w==8 && h==8)
return s->mecc.sse[1](NULL, src1, src2, stride, 8); return s->sse_cmp[1](NULL, src1, src2, stride, 8);
for(y=0; y<h; y++){ for(y=0; y<h; y++){
for(x=0; x<w; x++){ for(x=0; x<w; x++){

@ -94,7 +94,7 @@ static int nsse16_mmx(MpegEncContext *c, const uint8_t *pix1, const uint8_t *pix
int score1, score2; int score1, score2;
if (c) if (c)
score1 = c->mecc.sse[0](c, pix1, pix2, stride, h); score1 = c->sse_cmp[0](c, pix1, pix2, stride, h);
else else
score1 = ff_sse16_mmx(c, pix1, pix2, stride, h); score1 = ff_sse16_mmx(c, pix1, pix2, stride, h);
score2 = ff_hf_noise16_mmx(pix1, stride, h) + ff_hf_noise8_mmx(pix1+8, stride, h) score2 = ff_hf_noise16_mmx(pix1, stride, h) + ff_hf_noise8_mmx(pix1+8, stride, h)

Loading…
Cancel
Save