avcodec/vp9: Switch to ProgressFrames

This already fixes a race in the vp9-encparams test. In this test,
side data is added to the current frame after having been decoded
(and therefore after ff_thread_finish_setup() has been called).
Yet the update_thread_context callback called ff_thread_ref_frame()
and therefore av_frame_ref() with this frame as source frame and
the ensuing read was unsynchronised with adding the side data,
i.e. there was a data race.

By switching to the ProgressFrame API the implicit av_frame_ref()
is removed and the race fixed except if this frame is later reused by
a show-existing-frame which uses an explicit av_frame_ref().
The vp9-encparams test does not cover this, so this commit
already fixes all the races in this test.

This decoder kept multiple references to the same ThreadFrames
in the same context and therefore had lots of implicit av_frame_ref()
even when decoding single-threaded. This incurred lots of small
allocations: When decoding an ordinary 10s video in single-threaded
mode the number of allocations reported by Valgrind went down
from 57,814 to 20,908; for 10 threads it went down from 84,223 to
21,901.

Reviewed-by: Anton Khirnov <anton@khirnov.net>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
release/7.1
Andreas Rheinhardt 2 years ago
parent 444bd353e9
commit 7bd3b73716
  1. 4
      libavcodec/dxva2_vp9.c
  2. 2
      libavcodec/vaapi_vp9.c
  3. 137
      libavcodec/vp9.c
  4. 2
      libavcodec/vp9_mc_template.c
  5. 5
      libavcodec/vp9block.c
  6. 6
      libavcodec/vp9dec.h
  7. 1
      libavcodec/vp9lpf.c
  8. 4
      libavcodec/vp9mvs.c
  9. 19
      libavcodec/vp9recon.c
  10. 6
      libavcodec/vp9shared.h

@ -79,7 +79,7 @@ int ff_dxva2_vp9_fill_picture_parameters(const AVCodecContext *avctx, AVDXVACont
pp->Reserved8Bits = 0;
for (i = 0; i < 8; i++) {
if (h->refs[i].f->buf[0]) {
if (h->refs[i].f) {
fill_picture_entry(&pp->ref_frame_map[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[i].f, 0), 0);
pp->ref_frame_coded_width[i] = h->refs[i].f->width;
pp->ref_frame_coded_height[i] = h->refs[i].f->height;
@ -89,7 +89,7 @@ int ff_dxva2_vp9_fill_picture_parameters(const AVCodecContext *avctx, AVDXVACont
for (i = 0; i < 3; i++) {
uint8_t refidx = h->h.refidx[i];
if (h->refs[refidx].f->buf[0])
if (h->refs[refidx].f)
fill_picture_entry(&pp->frame_refs[i], ff_dxva2_get_surface_index(avctx, ctx, h->refs[refidx].f, 0), 0);
else
pp->frame_refs[i].bPicEntry = 0xFF;

@ -100,7 +100,7 @@ static int vaapi_vp9_start_frame(AVCodecContext *avctx,
}
for (i = 0; i < 8; i++) {
if (h->refs[i].f->buf[0])
if (h->refs[i].f)
pic_param.reference_frames[i] = ff_vaapi_get_surface_id(h->refs[i].f);
else
pic_param.reference_frames[i] = VA_INVALID_ID;

@ -30,9 +30,9 @@
#include "hwaccel_internal.h"
#include "hwconfig.h"
#include "profiles.h"
#include "progressframe.h"
#include "refstruct.h"
#include "thread.h"
#include "threadframe.h"
#include "pthread_internal.h"
#include "videodsp.h"
@ -100,7 +100,7 @@ static void vp9_tile_data_free(VP9TileData *td)
static void vp9_frame_unref(VP9Frame *f)
{
ff_thread_release_ext_buffer(&f->tf);
ff_progress_frame_unref(&f->tf);
ff_refstruct_unref(&f->extradata);
ff_refstruct_unref(&f->hwaccel_picture_private);
f->segmentation_map = NULL;
@ -111,7 +111,7 @@ static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
VP9Context *s = avctx->priv_data;
int ret, sz;
ret = ff_thread_get_ext_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
ret = ff_progress_frame_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
if (ret < 0)
return ret;
@ -147,13 +147,9 @@ fail:
return ret;
}
static int vp9_frame_ref(VP9Frame *dst, VP9Frame *src)
static void vp9_frame_ref(VP9Frame *dst, const VP9Frame *src)
{
int ret;
ret = ff_thread_ref_frame(&dst->tf, &src->tf);
if (ret < 0)
return ret;
ff_progress_frame_ref(&dst->tf, &src->tf);
dst->extradata = ff_refstruct_ref(src->extradata);
@ -163,8 +159,13 @@ static int vp9_frame_ref(VP9Frame *dst, VP9Frame *src)
ff_refstruct_replace(&dst->hwaccel_picture_private,
src->hwaccel_picture_private);
}
return 0;
static void vp9_frame_replace(VP9Frame *dst, const VP9Frame *src)
{
vp9_frame_unref(dst);
if (src && src->tf.f)
vp9_frame_ref(dst, src);
}
static int update_size(AVCodecContext *avctx, int w, int h)
@ -589,9 +590,9 @@ static int decode_frame_header(AVCodecContext *avctx,
s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
s->s.h.refidx[2] = get_bits(&s->gb, 3);
s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
!s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
!s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
if (!s->s.refs[s->s.h.refidx[0]].f ||
!s->s.refs[s->s.h.refidx[1]].f ||
!s->s.refs[s->s.h.refidx[2]].f) {
av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
return AVERROR_INVALIDDATA;
}
@ -611,7 +612,8 @@ static int decode_frame_header(AVCodecContext *avctx,
// Note that in this code, "CUR_FRAME" is actually before we
// have formally allocated a frame, and thus actually represents
// the _last_ frame
s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f &&
s->s.frames[CUR_FRAME].tf.f->width == w &&
s->s.frames[CUR_FRAME].tf.f->height == h;
if (get_bits1(&s->gb)) // display size
skip_bits(&s->gb, 32);
@ -1240,16 +1242,12 @@ static av_cold int vp9_decode_free(AVCodecContext *avctx)
VP9Context *s = avctx->priv_data;
int i;
for (i = 0; i < 3; i++) {
for (int i = 0; i < 3; i++)
vp9_frame_unref(&s->s.frames[i]);
av_frame_free(&s->s.frames[i].tf.f);
}
ff_refstruct_pool_uninit(&s->frame_extradata_pool);
for (i = 0; i < 8; i++) {
ff_thread_release_ext_buffer(&s->s.refs[i]);
av_frame_free(&s->s.refs[i].f);
ff_thread_release_ext_buffer(&s->next_refs[i]);
av_frame_free(&s->next_refs[i].f);
ff_progress_frame_unref(&s->s.refs[i]);
ff_progress_frame_unref(&s->next_refs[i]);
}
free_buffers(s);
@ -1384,7 +1382,7 @@ static int decode_tiles(AVCodecContext *avctx,
// FIXME maybe we can make this more finegrained by running the
// loopfilter per-block instead of after each sbrow
// In fact that would also make intra pred left preparation easier?
ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, row >> 3);
}
}
return 0;
@ -1561,12 +1559,13 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
int ret, i, j, ref;
int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
(!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
const VP9Frame *src;
AVFrame *f;
if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
return ret;
} else if (ret == 0) {
if (!s->s.refs[ref].f->buf[0]) {
if (!s->s.refs[ref].f) {
av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
return AVERROR_INVALIDDATA;
}
@ -1574,33 +1573,19 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
return ret;
frame->pts = pkt->pts;
frame->pkt_dts = pkt->dts;
for (i = 0; i < 8; i++) {
if (s->next_refs[i].f->buf[0])
ff_thread_release_ext_buffer(&s->next_refs[i]);
if (s->s.refs[i].f->buf[0] &&
(ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
return ret;
}
for (int i = 0; i < 8; i++)
ff_progress_frame_replace(&s->next_refs[i], &s->s.refs[i]);
*got_frame = 1;
return pkt->size;
}
data += ret;
size -= ret;
if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
vp9_frame_unref(&s->s.frames[REF_FRAME_SEGMAP]);
if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
(ret = vp9_frame_ref(&s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
return ret;
}
if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
vp9_frame_unref(&s->s.frames[REF_FRAME_MVPAIR]);
if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
(ret = vp9_frame_ref(&s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
return ret;
if (s->s.frames[CUR_FRAME].tf.f->buf[0])
vp9_frame_unref(&s->s.frames[CUR_FRAME]);
src = !s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres ? &s->s.frames[CUR_FRAME] : NULL;
if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly)
vp9_frame_replace(&s->s.frames[REF_FRAME_SEGMAP], src);
vp9_frame_replace(&s->s.frames[REF_FRAME_MVPAIR], src);
vp9_frame_unref(&s->s.frames[CUR_FRAME]);
if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
return ret;
f = s->s.frames[CUR_FRAME].tf.f;
@ -1610,7 +1595,8 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
f->flags &= ~AV_FRAME_FLAG_KEY;
f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
// Non-existent frames have the implicit dimension 0x0 != CUR_FRAME
if (!s->s.frames[REF_FRAME_MVPAIR].tf.f ||
(s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
vp9_frame_unref(&s->s.frames[REF_FRAME_SEGMAP]);
@ -1618,15 +1604,9 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
// ref frame setup
for (i = 0; i < 8; i++) {
if (s->next_refs[i].f->buf[0])
ff_thread_release_ext_buffer(&s->next_refs[i]);
if (s->s.h.refreshrefmask & (1 << i)) {
ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
} else if (s->s.refs[i].f->buf[0]) {
ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
}
if (ret < 0)
return ret;
ff_progress_frame_replace(&s->next_refs[i],
s->s.h.refreshrefmask & (1 << i) ?
&s->s.frames[CUR_FRAME].tf : &s->s.refs[i]);
}
if (avctx->hwaccel) {
@ -1736,7 +1716,7 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
{
ret = decode_tiles(avctx, data, size);
if (ret < 0) {
ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, INT_MAX);
return ret;
}
}
@ -1752,7 +1732,7 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
ff_thread_finish_setup(avctx);
}
} while (s->pass++ == 1);
ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
ff_progress_frame_report(&s->s.frames[CUR_FRAME].tf, INT_MAX);
if (s->td->error_info < 0) {
av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n");
@ -1767,13 +1747,8 @@ static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
finish:
// ref frame setup
for (i = 0; i < 8; i++) {
if (s->s.refs[i].f->buf[0])
ff_thread_release_ext_buffer(&s->s.refs[i]);
if (s->next_refs[i].f->buf[0] &&
(ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
return ret;
}
for (int i = 0; i < 8; i++)
ff_progress_frame_replace(&s->s.refs[i], &s->next_refs[i]);
if (!s->s.h.invisible) {
if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
@ -1792,7 +1767,7 @@ static void vp9_decode_flush(AVCodecContext *avctx)
for (i = 0; i < 3; i++)
vp9_frame_unref(&s->s.frames[i]);
for (i = 0; i < 8; i++)
ff_thread_release_ext_buffer(&s->s.refs[i]);
ff_progress_frame_unref(&s->s.refs[i]);
if (FF_HW_HAS_CB(avctx, flush))
FF_HW_SIMPLE_CALL(avctx, flush);
@ -1814,42 +1789,18 @@ static av_cold int vp9_decode_init(AVCodecContext *avctx)
}
#endif
for (int i = 0; i < 3; i++) {
s->s.frames[i].tf.f = av_frame_alloc();
if (!s->s.frames[i].tf.f)
return AVERROR(ENOMEM);
}
for (int i = 0; i < 8; i++) {
s->s.refs[i].f = av_frame_alloc();
s->next_refs[i].f = av_frame_alloc();
if (!s->s.refs[i].f || !s->next_refs[i].f)
return AVERROR(ENOMEM);
}
return 0;
}
#if HAVE_THREADS
static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
{
int i, ret;
VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
for (i = 0; i < 3; i++) {
if (s->s.frames[i].tf.f->buf[0])
vp9_frame_unref(&s->s.frames[i]);
if (ssrc->s.frames[i].tf.f->buf[0]) {
if ((ret = vp9_frame_ref(&s->s.frames[i], &ssrc->s.frames[i])) < 0)
return ret;
}
}
for (i = 0; i < 8; i++) {
if (s->s.refs[i].f->buf[0])
ff_thread_release_ext_buffer(&s->s.refs[i]);
if (ssrc->next_refs[i].f->buf[0]) {
if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
return ret;
}
}
for (int i = 0; i < 3; i++)
vp9_frame_replace(&s->s.frames[i], &ssrc->s.frames[i]);
for (int i = 0; i < 8; i++)
ff_progress_frame_replace(&s->s.refs[i], &ssrc->next_refs[i]);
ff_refstruct_replace(&s->frame_extradata_pool, ssrc->frame_extradata_pool);
s->frame_extradata_pool_size = ssrc->frame_extradata_pool_size;
@ -1889,7 +1840,7 @@ const FFCodec ff_vp9_decoder = {
.p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP |
FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
FF_CODEC_CAP_ALLOCATE_PROGRESS,
FF_CODEC_CAP_USES_PROGRESSFRAMES,
.flush = vp9_decode_flush,
UPDATE_THREAD_CONTEXT(vp9_decode_update_thread_context),
.p.profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),

@ -36,7 +36,7 @@ static void FN(inter_pred)(VP9TileData *td)
const VP9Context *s = td->s;
VP9Block *b = td->b;
int row = td->row, col = td->col;
const ThreadFrame *tref1 = &s->s.refs[s->s.h.refidx[b->ref[0]]], *tref2;
const ProgressFrame *tref1 = &s->s.refs[s->s.h.refidx[b->ref[0]]], *tref2;
const AVFrame *ref1 = tref1->f, *ref2;
int w1 = ref1->width, h1 = ref1->height, w2, h2;
ptrdiff_t ls_y = td->y_stride, ls_uv = td->uv_stride;

@ -22,8 +22,9 @@
*/
#include "libavutil/avassert.h"
#include "libavutil/frame.h"
#include "threadframe.h"
#include "progressframe.h"
#include "vp89_rac.h"
#include "vp9.h"
#include "vp9data.h"
@ -113,7 +114,7 @@ static void decode_mode(VP9TileData *td)
uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
ff_progress_frame_await(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3);
for (y = 0; y < h4; y++) {
int idx_base = (y + row) * 8 * s->sb_cols + col;
for (x = 0; x < w4; x++)

@ -29,8 +29,8 @@
#include <stdatomic.h>
#include "libavutil/mem_internal.h"
#include "libavutil/pixfmt.h"
#include "libavutil/thread.h"
#include "libavutil/internal.h"
#include "get_bits.h"
#include "videodsp.h"
@ -120,7 +120,7 @@ typedef struct VP9Context {
int w, h;
enum AVPixelFormat pix_fmt, last_fmt, gf_fmt;
unsigned sb_cols, sb_rows, rows, cols;
ThreadFrame next_refs[8];
ProgressFrame next_refs[8];
struct {
uint8_t lim_lut[64];
@ -245,7 +245,7 @@ void ff_vp9_decode_block(VP9TileData *td, int row, int col,
VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
enum BlockLevel bl, enum BlockPartition bp);
void ff_vp9_loopfilter_sb(AVCodecContext *avctx, VP9Filter *lflvl,
void ff_vp9_loopfilter_sb(struct AVCodecContext *avctx, VP9Filter *lflvl,
int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff);
void ff_vp9_intra_recon_8bpp(VP9TileData *td,

@ -21,6 +21,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "avcodec.h"
#include "vp9dec.h"
static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,

@ -21,7 +21,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "threadframe.h"
#include "progressframe.h"
#include "vp89_rac.h"
#include "vp9data.h"
#include "vp9dec.h"
@ -175,7 +175,7 @@ static void find_ref_mvs(VP9TileData *td,
VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
if (!s->s.frames[REF_FRAME_MVPAIR].uses_2pass)
ff_thread_await_progress(&s->s.frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
ff_progress_frame_await(&s->s.frames[REF_FRAME_MVPAIR].tf, row >> 3);
if (mv->ref[0] == ref)
RETURN_MV(mv->mv[0]);
else if (mv->ref[1] == ref)

@ -22,9 +22,10 @@
*/
#include "libavutil/avassert.h"
#include "libavutil/frame.h"
#include "libavutil/mem_internal.h"
#include "threadframe.h"
#include "progressframe.h"
#include "videodsp.h"
#include "vp9data.h"
#include "vp9dec.h"
@ -298,7 +299,7 @@ void ff_vp9_intra_recon_16bpp(VP9TileData *td, ptrdiff_t y_off, ptrdiff_t uv_off
static av_always_inline void mc_luma_unscaled(VP9TileData *td, const vp9_mc_func (*mc)[2],
uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
const ThreadFrame *ref_frame,
const ProgressFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
int bw, int bh, int w, int h, int bytesperpixel)
{
@ -314,7 +315,7 @@ static av_always_inline void mc_luma_unscaled(VP9TileData *td, const vp9_mc_func
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th = (y + bh + 4 * !!my + 7) >> 6;
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
ff_progress_frame_await(ref_frame, FFMAX(th, 0));
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (!!my * 5) than horizontally (!!mx * 4).
@ -336,7 +337,7 @@ static av_always_inline void mc_chroma_unscaled(VP9TileData *td, const vp9_mc_fu
ptrdiff_t dst_stride,
const uint8_t *ref_u, ptrdiff_t src_stride_u,
const uint8_t *ref_v, ptrdiff_t src_stride_v,
const ThreadFrame *ref_frame,
const ProgressFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP9mv *mv,
int bw, int bh, int w, int h, int bytesperpixel)
{
@ -353,7 +354,7 @@ static av_always_inline void mc_chroma_unscaled(VP9TileData *td, const vp9_mc_fu
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
ff_progress_frame_await(ref_frame, FFMAX(th, 0));
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (!!my * 5) than horizontally (!!mx * 4).
@ -407,7 +408,7 @@ static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func
const vp9_mc_func (*mc)[2],
uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
const ThreadFrame *ref_frame,
const ProgressFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
int px, int py, int pw, int ph,
int bw, int bh, int w, int h, int bytesperpixel,
@ -444,7 +445,7 @@ static av_always_inline void mc_luma_scaled(VP9TileData *td, vp9_scaled_mc_func
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th = (y + refbh_m1 + 4 + 7) >> 6;
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
ff_progress_frame_await(ref_frame, FFMAX(th, 0));
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
@ -467,7 +468,7 @@ static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_fun
ptrdiff_t dst_stride,
const uint8_t *ref_u, ptrdiff_t src_stride_u,
const uint8_t *ref_v, ptrdiff_t src_stride_v,
const ThreadFrame *ref_frame,
const ProgressFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP9mv *in_mv,
int px, int py, int pw, int ph,
int bw, int bh, int w, int h, int bytesperpixel,
@ -514,7 +515,7 @@ static av_always_inline void mc_chroma_scaled(VP9TileData *td, vp9_scaled_mc_fun
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
ff_progress_frame_await(ref_frame, FFMAX(th, 0));
// The arm/aarch64 _hv filters read one more row than what actually is
// needed, so switch to emulated edge one pixel sooner vertically
// (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).

@ -29,8 +29,8 @@
#include "libavutil/mem_internal.h"
#include "progressframe.h"
#include "vp9.h"
#include "threadframe.h"
enum BlockPartition {
PARTITION_NONE, // [ ] <-.
@ -63,7 +63,7 @@ typedef struct VP9mvrefPair {
} VP9mvrefPair;
typedef struct VP9Frame {
ThreadFrame tf;
ProgressFrame tf;
void *extradata; ///< RefStruct reference
uint8_t *segmentation_map;
VP9mvrefPair *mv;
@ -164,7 +164,7 @@ typedef struct VP9BitstreamHeader {
typedef struct VP9SharedContext {
VP9BitstreamHeader h;
ThreadFrame refs[8];
ProgressFrame refs[8];
#define CUR_FRAME 0
#define REF_FRAME_MVPAIR 1
#define REF_FRAME_SEGMAP 2

Loading…
Cancel
Save