From b80e43bf07ac890486e01a195abc9118ab403eb0 Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Mon, 1 May 2023 16:35:22 +0200 Subject: [PATCH] avfilter/vf_libplacebo: add flexible crop exprs Motivated by a desire to use vf_libplacebo as a GPU-accelerated cropping/padding/zooming filter. This commit adds support for setting the `input/target.crop` fields as dynamic expressions. Re-use the same generic variables available to other scale and crop type filters, and also add some more that we can afford as a result of being able to set these properties dynamically. It's worth pointing out that `out_t/ot` is currently redundant with `in_t/t` since it will always contain the same PTS values, but I plan on changing this in the near future. I decided to also expose `crop_w/crop_h` and `pos_w/pos_h` as variables in the expression parser itself, since this enables the fairly common use case of determining dimensions first and then placing the image appropriately, such as is done in the default behavior (which centers the cropped/placed region by default). --- doc/filters.texi | 55 ++++++++++++-- libavfilter/vf_libplacebo.c | 145 +++++++++++++++++++++++++++++++++++- 2 files changed, 192 insertions(+), 8 deletions(-) diff --git a/doc/filters.texi b/doc/filters.texi index 34212b513d..839a1c6a9f 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -15997,10 +15997,31 @@ in source frames. @table @option @item w @item h -Set the output video dimension expression. Default value is the input dimension. +Set the output video dimension expression. Default values are @code{iw} and +@code{ih}. Allows for the same expressions as the @ref{scale} filter. +@item crop_x +@item crop_y +Set the input crop x/y expressions, default values are @code{(iw-cw)/2} and +@code{(ih-ch)/2}. + +@item crop_w +@item crop_h +Set the input crop width/height expressions, default values are @code{iw} and +@code{ih}. + +@item pos_x +@item pos_y +Set the output placement x/y expressions, default values are @code{(ow-pw)/2} +and @code{(oh-ph)/2}. + +@item pos_w +@item pos_h +Set the output placement width/height expressions, default values are @code{ow} +and @code{oh}. + @item format Set the output format override. If unset (the default), frames will be output in the same format as the respective input frames. Otherwise, format conversion @@ -16012,9 +16033,9 @@ Work the same as the identical @ref{scale} filter options. @item normalize_sar If enabled, output frames will always have a pixel aspect ratio of 1:1. This -will introduce padding/cropping as necessary. If disabled (the default), any -aspect ratio mismatches, including those from e.g. anamorphic video sources, -are forwarded to the output pixel aspect ratio. +will introduce additional padding/cropping as necessary. If disabled (the +default), any aspect ratio mismatches, including those from e.g. anamorphic +video sources, are forwarded to the output pixel aspect ratio. @item pad_crop_ratio Specifies a ratio (between @code{0.0} and @code{1.0}) between padding and @@ -16026,7 +16047,7 @@ approaches. @item fillcolor Set the color used to fill the output area not covered by the output image, for -example as a result of @ref{normalize_sar}. For the general syntax of this +example as a result of @option{normalize_sar}. For the general syntax of this option, check the @ref{color syntax,,"Color" section in the ffmpeg-utils manual,ffmpeg-utils}. Defaults to @code{black}. @@ -16051,6 +16072,30 @@ BT.2020+PQ, overriding the usual input frame metadata. These will also be picked as the values of @code{auto} for the respective frame output options. @end table +In addition to the expression constants documented for the @ref{scale} filter, +the @option{crop_w}, @option{crop_h}, @option{crop_x}, @option{crop_y}, +@option{pos_w}, @option{pos_h}, @option{pos_x} and @option{pos_y} options can +also contain the following constants: + +@table @option +@item crop_w, cw +@item crop_h, ch +The computed values of @option{crop_w} and @option{crop_h}. + +@item pos_w, pw +@item pos_h, ph +The computed values of @option{pos_w} and @option{pos_h}. + +@item in_t, t +The input frame timestamp, in seconds. NAN if input timestamp is unknown. + +@item out_t, ot +The input frame timestamp, in seconds. NAN if input timestamp is unknown. + +@item n +The input frame number, starting with 0. +@end table + @subsubsection Scaling The options in this section control how libplacebo performs upscaling and (if necessary) downscaling. Note that libplacebo will always internally operate on diff --git a/libavfilter/vf_libplacebo.c b/libavfilter/vf_libplacebo.c index fcdc97e48e..6fe3e0ea88 100644 --- a/libavfilter/vf_libplacebo.c +++ b/libavfilter/vf_libplacebo.c @@ -16,6 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/eval.h" #include "libavutil/file.h" #include "libavutil/opt.h" #include "libavutil/parseutils.h" @@ -60,6 +61,50 @@ static const struct pl_tone_map_function * const tonemapping_funcs[TONE_MAP_COUN [TONE_MAP_LINEAR] = &pl_tone_map_linear, }; +static const char *const var_names[] = { + "in_w", "iw", ///< width of the input video frame + "in_h", "ih", ///< height of the input video frame + "out_w", "ow", ///< width of the output video frame + "out_h", "oh", ///< height of the output video frame + "crop_w", "cw", ///< evaluated input crop width + "crop_h", "ch", ///< evaluated input crop height + "pos_w", "pw", ///< evaluated output placement width + "pos_h", "ph", ///< evaluated output placement height + "a", ///< iw/ih + "sar", ///< input pixel aspect ratio + "dar", ///< output pixel aspect ratio + "hsub", ///< input horizontal subsampling factor + "vsub", ///< input vertical subsampling factor + "ohsub", ///< output horizontal subsampling factor + "ovsub", ///< output vertical subsampling factor + "in_t", "t", ///< input frame pts + "out_t", "ot", ///< output frame pts + "n", ///< number of frame + NULL, +}; + +enum var_name { + VAR_IN_W, VAR_IW, + VAR_IN_H, VAR_IH, + VAR_OUT_W, VAR_OW, + VAR_OUT_H, VAR_OH, + VAR_CROP_W, VAR_CW, + VAR_CROP_H, VAR_CH, + VAR_POS_W, VAR_PW, + VAR_POS_H, VAR_PH, + VAR_A, + VAR_SAR, + VAR_DAR, + VAR_HSUB, + VAR_VSUB, + VAR_OHSUB, + VAR_OVSUB, + VAR_IN_T, VAR_T, + VAR_OUT_T, VAR_OT, + VAR_N, + VAR_VARS_NB +}; + typedef struct LibplaceboContext { /* lavfi vulkan*/ FFVulkanContext vkctx; @@ -75,8 +120,16 @@ typedef struct LibplaceboContext { char *out_format_string; enum AVPixelFormat out_format; char *fillcolor; + double var_values[VAR_VARS_NB]; char *w_expr; char *h_expr; + char *crop_x_expr, *crop_y_expr; + char *crop_w_expr, *crop_h_expr; + char *pos_x_expr, *pos_y_expr; + char *pos_w_expr, *pos_h_expr; + // Parsed expressions for input/output crop + AVExpr *crop_x_pexpr, *crop_y_pexpr, *crop_w_pexpr, *crop_h_pexpr; + AVExpr *pos_x_pexpr, *pos_y_pexpr, *pos_w_pexpr, *pos_h_pexpr; AVRational target_sar; float pad_crop_ratio; int force_original_aspect_ratio; @@ -249,6 +302,7 @@ static void libplacebo_uninit(AVFilterContext *avctx); static int libplacebo_init(AVFilterContext *avctx) { + int err = 0; LibplaceboContext *s = avctx->priv; /* Create libplacebo log context */ @@ -273,8 +327,28 @@ static int libplacebo_init(AVFilterContext *avctx) s->out_format = AV_PIX_FMT_NONE; } + RET(av_expr_parse(&s->crop_x_pexpr, s->crop_x_expr, var_names, + NULL, NULL, NULL, NULL, 0, s)); + RET(av_expr_parse(&s->crop_y_pexpr, s->crop_y_expr, var_names, + NULL, NULL, NULL, NULL, 0, s)); + RET(av_expr_parse(&s->crop_w_pexpr, s->crop_w_expr, var_names, + NULL, NULL, NULL, NULL, 0, s)); + RET(av_expr_parse(&s->crop_h_pexpr, s->crop_h_expr, var_names, + NULL, NULL, NULL, NULL, 0, s)); + RET(av_expr_parse(&s->pos_x_pexpr, s->pos_x_expr, var_names, + NULL, NULL, NULL, NULL, 0, s)); + RET(av_expr_parse(&s->pos_y_pexpr, s->pos_y_expr, var_names, + NULL, NULL, NULL, NULL, 0, s)); + RET(av_expr_parse(&s->pos_w_pexpr, s->pos_w_expr, var_names, + NULL, NULL, NULL, NULL, 0, s)); + RET(av_expr_parse(&s->pos_h_pexpr, s->pos_h_expr, var_names, + NULL, NULL, NULL, NULL, 0, s)); + /* Note: s->vulkan etc. are initialized later, when hwctx is available */ return 0; + +fail: + return err; } static int init_vulkan(AVFilterContext *avctx) @@ -364,6 +438,15 @@ static void libplacebo_uninit(AVFilterContext *avctx) pl_log_destroy(&s->log); ff_vk_uninit(&s->vkctx); s->gpu = NULL; + + av_expr_free(s->crop_x_pexpr); + av_expr_free(s->crop_y_pexpr); + av_expr_free(s->crop_w_pexpr); + av_expr_free(s->crop_h_pexpr); + av_expr_free(s->pos_x_pexpr); + av_expr_free(s->pos_y_pexpr); + av_expr_free(s->pos_w_pexpr); + av_expr_free(s->pos_h_pexpr); } static int process_frames(AVFilterContext *avctx, AVFrame *out, AVFrame *in) @@ -398,6 +481,25 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out, AVFrame *in) if (!s->apply_filmgrain) image.film_grain.type = PL_FILM_GRAIN_NONE; + s->var_values[VAR_CROP_W] = s->var_values[VAR_CW] = + av_expr_eval(s->crop_w_pexpr, s->var_values, NULL); + s->var_values[VAR_CROP_H] = s->var_values[VAR_CH] = + av_expr_eval(s->crop_h_pexpr, s->var_values, NULL); + s->var_values[VAR_POS_W] = s->var_values[VAR_PW] = + av_expr_eval(s->pos_w_pexpr, s->var_values, NULL); + s->var_values[VAR_POS_H] = s->var_values[VAR_PH] = + av_expr_eval(s->pos_h_pexpr, s->var_values, NULL); + + image.crop.x0 = av_expr_eval(s->crop_x_pexpr, s->var_values, NULL); + image.crop.y0 = av_expr_eval(s->crop_y_pexpr, s->var_values, NULL); + image.crop.x1 = image.crop.x0 + s->var_values[VAR_CROP_W]; + image.crop.y1 = image.crop.y0 + s->var_values[VAR_CROP_H]; + + target.crop.x0 = av_expr_eval(s->pos_x_pexpr, s->var_values, NULL); + target.crop.y0 = av_expr_eval(s->pos_y_pexpr, s->var_values, NULL); + target.crop.x1 = target.crop.x0 + s->var_values[VAR_POS_W]; + target.crop.y1 = target.crop.y0 + s->var_values[VAR_POS_H]; + if (s->target_sar.num) { float aspect = pl_rect2df_aspect(&target.crop) * av_q2d(s->target_sar); pl_rect2df_aspect_set(&target.crop, aspect, s->pad_crop_ratio); @@ -530,6 +632,18 @@ static int filter_frame(AVFilterLink *link, AVFrame *in) out->width = outlink->w; out->height = outlink->h; + /* Dynamic variables */ + s->var_values[VAR_IN_T] = s->var_values[VAR_T] = + in->pts == AV_NOPTS_VALUE ? NAN : in->pts * av_q2d(link->time_base); + s->var_values[VAR_OUT_T] = s->var_values[VAR_OT] = + out->pts == AV_NOPTS_VALUE ? NAN : out->pts * av_q2d(outlink->time_base); + s->var_values[VAR_N] = link->frame_count_out; + /* Will be evaluated/set by `process_frames` */ + s->var_values[VAR_CROP_W] = s->var_values[VAR_CW] = NAN; + s->var_values[VAR_CROP_H] = s->var_values[VAR_CH] = NAN; + s->var_values[VAR_POS_W] = s->var_values[VAR_PW] = NAN; + s->var_values[VAR_POS_H] = s->var_values[VAR_PH] = NAN; + if (s->apply_dovi && av_frame_get_side_data(in, AV_FRAME_DATA_DOVI_METADATA)) { /* Output of dovi reshaping is always BT.2020+PQ, so infer the correct * output colorspace defaults */ @@ -660,6 +774,8 @@ static int libplacebo_config_output(AVFilterLink *outlink) AVFilterContext *avctx = outlink->src; LibplaceboContext *s = avctx->priv; AVFilterLink *inlink = outlink->src->inputs[0]; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); + const AVPixFmtDescriptor *out_desc = av_pix_fmt_desc_get(outlink->format); AVHWFramesContext *hwfc; AVVulkanFramesContext *vkfc; AVRational scale_sar; @@ -687,6 +803,21 @@ static int libplacebo_config_output(AVFilterLink *outlink) outlink->sample_aspect_ratio = scale_sar; } + /* Static variables */ + s->var_values[VAR_IN_W] = s->var_values[VAR_IW] = inlink->w; + s->var_values[VAR_IN_H] = s->var_values[VAR_IH] = inlink->h; + s->var_values[VAR_OUT_W] = s->var_values[VAR_OW] = outlink->w; + s->var_values[VAR_OUT_H] = s->var_values[VAR_OH] = outlink->h; + s->var_values[VAR_A] = (double) inlink->w / inlink->h; + s->var_values[VAR_SAR] = inlink->sample_aspect_ratio.num ? + av_q2d(inlink->sample_aspect_ratio) : 1.0; + s->var_values[VAR_DAR] = outlink->sample_aspect_ratio.num ? + av_q2d(outlink->sample_aspect_ratio) : 1.0; + s->var_values[VAR_HSUB] = 1 << desc->log2_chroma_w; + s->var_values[VAR_VSUB] = 1 << desc->log2_chroma_h; + s->var_values[VAR_OHSUB] = 1 << out_desc->log2_chroma_w; + s->var_values[VAR_OVSUB] = 1 << out_desc->log2_chroma_h; + if (outlink->format != AV_PIX_FMT_VULKAN) return 0; @@ -714,15 +845,23 @@ fail: #define DYNAMIC (STATIC | AV_OPT_FLAG_RUNTIME_PARAM) static const AVOption libplacebo_options[] = { - { "w", "Output video width", OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = STATIC }, - { "h", "Output video height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = STATIC }, + { "w", "Output video frame width", OFFSET(w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = STATIC }, + { "h", "Output video frame height", OFFSET(h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = STATIC }, + { "crop_x", "Input video crop x", OFFSET(crop_x_expr), AV_OPT_TYPE_STRING, {.str = "(iw-cw)/2"}, .flags = DYNAMIC }, + { "crop_y", "Input video crop y", OFFSET(crop_y_expr), AV_OPT_TYPE_STRING, {.str = "(ih-ch)/2"}, .flags = DYNAMIC }, + { "crop_w", "Input video crop w", OFFSET(crop_w_expr), AV_OPT_TYPE_STRING, {.str = "iw"}, .flags = DYNAMIC }, + { "crop_h", "Input video crop h", OFFSET(crop_h_expr), AV_OPT_TYPE_STRING, {.str = "ih"}, .flags = DYNAMIC }, + { "pos_x", "Output video placement x", OFFSET(pos_x_expr), AV_OPT_TYPE_STRING, {.str = "(ow-pw)/2"}, .flags = DYNAMIC }, + { "pos_y", "Output video placement y", OFFSET(pos_y_expr), AV_OPT_TYPE_STRING, {.str = "(oh-ph)/2"}, .flags = DYNAMIC }, + { "pos_w", "Output video placement w", OFFSET(pos_w_expr), AV_OPT_TYPE_STRING, {.str = "ow"}, .flags = DYNAMIC }, + { "pos_h", "Output video placement h", OFFSET(pos_h_expr), AV_OPT_TYPE_STRING, {.str = "oh"}, .flags = DYNAMIC }, { "format", "Output video format", OFFSET(out_format_string), AV_OPT_TYPE_STRING, .flags = STATIC }, { "force_original_aspect_ratio", "decrease or increase w/h if necessary to keep the original AR", OFFSET(force_original_aspect_ratio), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, STATIC, "force_oar" }, { "disable", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, 0, 0, STATIC, "force_oar" }, { "decrease", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, 0, 0, STATIC, "force_oar" }, { "increase", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = 2 }, 0, 0, STATIC, "force_oar" }, { "force_divisible_by", "enforce that the output resolution is divisible by a defined integer when force_original_aspect_ratio is used", OFFSET(force_divisible_by), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 256, STATIC }, - { "normalize_sar", "force SAR normalization to 1:1", OFFSET(normalize_sar), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, STATIC }, + { "normalize_sar", "force SAR normalization to 1:1 by adjusting pos_x/y/w/h", OFFSET(normalize_sar), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, STATIC }, { "pad_crop_ratio", "ratio between padding and cropping when normalizing SAR (0=pad, 1=crop)", OFFSET(pad_crop_ratio), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, 1.0, DYNAMIC }, { "fillcolor", "Background fill color", OFFSET(fillcolor), AV_OPT_TYPE_STRING, {.str = "black"}, .flags = DYNAMIC },