diff --git a/libavfilter/vf_zscale.c b/libavfilter/vf_zscale.c index 952efaf41b..2c390de308 100644 --- a/libavfilter/vf_zscale.c +++ b/libavfilter/vf_zscale.c @@ -119,6 +119,10 @@ typedef struct ZScaleContext { void *tmp[MAX_THREADS]; //separate for each thread; int nb_threads; int jobs_ret[MAX_THREADS]; + int in_slice_start[MAX_THREADS]; + int in_slice_end[MAX_THREADS]; + int out_slice_start[MAX_THREADS]; + int out_slice_end[MAX_THREADS]; zimg_image_format src_format, dst_format; zimg_image_format alpha_src_format, alpha_dst_format; @@ -226,6 +230,31 @@ static int query_formats(AVFilterContext *ctx) return ff_formats_ref(ff_make_format_list(pixel_fmts), &ctx->outputs[0]->incfg.formats); } +static void slice_params(ZScaleContext *s, int out_h, int in_h) +{ + int slice_size; + + slice_size = (out_h + s->nb_threads - 1) / s->nb_threads; + if (slice_size % 2) + slice_size += 1; + s->out_slice_start[0] = 0; + s->out_slice_end[0] = FFMIN(out_h, slice_size); + for (int i = 1; i < s->nb_threads - 1; i++) { + s->out_slice_start[i] = s->out_slice_end[i-1]; + s->out_slice_end[i] = s->out_slice_start[i] + slice_size; + } + + if (s->nb_threads > 1) { + s->out_slice_start[s->nb_threads - 1] = s->out_slice_end[s->nb_threads - 2]; + s->out_slice_end[s->nb_threads - 1] = out_h; + } + + for (int i = 0; i < s->nb_threads; i++) { + s->in_slice_start[i] = av_rescale_rnd(s->out_slice_start[i], in_h * 2, out_h * 2, AV_ROUND_NEAR_INF); + s->in_slice_end[i] = av_rescale_rnd(s->out_slice_end[i], in_h * 2, out_h * 2, AV_ROUND_NEAR_INF); + } +} + static int config_props(AVFilterLink *outlink) { AVFilterContext *ctx = outlink->src; @@ -570,10 +599,10 @@ static int graphs_build(AVFrame *in, AVFrame *out, const AVPixFmtDescriptor *des zimg_image_format dst_format; zimg_image_format alpha_src_format; zimg_image_format alpha_dst_format; - const int in_slice_start = 4 * ((((in->height + 2) / 4) * job_nr) / n_jobs); - const int in_slice_end = (job_nr == n_jobs-1) ? in->height : 4 * (((in->height + 2) / 4) * (job_nr+1) / n_jobs); - const int out_slice_start = 4 * ((((out->height + 2) / 4) * job_nr) / n_jobs); - const int out_slice_end = (job_nr == n_jobs-1) ? out->height : 4 * (((out->height + 2) / 4) * (job_nr+1) / n_jobs); + const int in_slice_start = s->in_slice_start[job_nr]; + const int in_slice_end = s->in_slice_end[job_nr]; + const int out_slice_start = s->out_slice_start[job_nr]; + const int out_slice_end = s->out_slice_end[job_nr]; src_format = s->src_format; dst_format = s->dst_format; @@ -694,7 +723,7 @@ static int filter_slice(AVFilterContext *ctx, void *data, int job_nr, int n_jobs ZScaleContext *s = ctx->priv; zimg_image_buffer_const src_buf = { ZIMG_API_VERSION }; zimg_image_buffer dst_buf = { ZIMG_API_VERSION }; - const int out_slice_start = 4 * ((((td->out->height + 2) / 4) * job_nr) / n_jobs); + const int out_slice_start = s->out_slice_start[job_nr]; /* create zimg filter graphs for each thread only if not created earlier or there is some change in frame parameters */ @@ -810,6 +839,8 @@ static int filter_frame(AVFilterLink *link, AVFrame *in) s->out_primaries = out->color_primaries; s->out_range = out->color_range; + slice_params(s, out->height, in->height); + zimg_image_format_default(&s->src_format, ZIMG_API_VERSION); zimg_image_format_default(&s->dst_format, ZIMG_API_VERSION); zimg_graph_builder_params_default(&s->params, ZIMG_API_VERSION);