mirror of https://github.com/FFmpeg/FFmpeg.git
Calculate Spatial Info (SI) and Temporal Info (TI) scores for a video, as defined in ITU-T P.910: Subjective video quality assessment methods for multimedia applications.release/5.1
parent
451300d0e8
commit
80831e742b
9 changed files with 403 additions and 1 deletions
@ -0,0 +1,349 @@ |
||||
/*
|
||||
* Copyright (c) 2021 Boris Baracaldo |
||||
* Copyright (c) 2022 Thilo Borgmann |
||||
* |
||||
* This file is part of FFmpeg. |
||||
* |
||||
* FFmpeg is free software; you can redistribute it and/or modify |
||||
* it under the terms of the GNU General Public License as published by |
||||
* the Free Software Foundation; either version 2 of the License, or |
||||
* (at your option) any later version. |
||||
* |
||||
* FFmpeg is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
* GNU General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU General Public License along |
||||
* with FFmpeg; if not, write to the Free Software Foundation, Inc., |
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
||||
*/ |
||||
|
||||
/**
|
||||
* @file |
||||
* Calculate Spatial Info (SI) and Temporal Info (TI) scores |
||||
*/ |
||||
|
||||
#include <math.h> |
||||
|
||||
#include "libavutil/imgutils.h" |
||||
#include "libavutil/internal.h" |
||||
#include "libavutil/opt.h" |
||||
|
||||
#include "avfilter.h" |
||||
#include "formats.h" |
||||
#include "internal.h" |
||||
#include "video.h" |
||||
|
||||
static const int X_FILTER[9] = { |
||||
1, 0, -1, |
||||
2, 0, -2, |
||||
1, 0, -1 |
||||
}; |
||||
|
||||
static const int Y_FILTER[9] = { |
||||
1, 2, 1, |
||||
0, 0, 0, |
||||
-1, -2, -1 |
||||
}; |
||||
|
||||
typedef struct SiTiContext { |
||||
const AVClass *class; |
||||
int pixel_depth; |
||||
int width, height; |
||||
uint64_t nb_frames; |
||||
uint8_t *prev_frame; |
||||
float max_si; |
||||
float max_ti; |
||||
float min_si; |
||||
float min_ti; |
||||
float sum_si; |
||||
float sum_ti; |
||||
float *gradient_matrix; |
||||
float *motion_matrix; |
||||
int full_range; |
||||
int print_summary; |
||||
} SiTiContext; |
||||
|
||||
static const enum AVPixelFormat pix_fmts[] = { |
||||
AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, |
||||
AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, |
||||
AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, |
||||
AV_PIX_FMT_NONE |
||||
}; |
||||
|
||||
static av_cold int init(AVFilterContext *ctx) |
||||
{ |
||||
// User options but no input data
|
||||
SiTiContext *s = ctx->priv; |
||||
s->max_si = 0; |
||||
s->max_ti = 0; |
||||
return 0; |
||||
} |
||||
|
||||
static av_cold void uninit(AVFilterContext *ctx) |
||||
{ |
||||
SiTiContext *s = ctx->priv; |
||||
|
||||
if (s->print_summary) { |
||||
float avg_si = s->sum_si / s->nb_frames; |
||||
float avg_ti = s->sum_ti / s->nb_frames; |
||||
av_log(ctx, AV_LOG_INFO, |
||||
"SITI Summary:\nTotal frames: %"PRId64"\n\n" |
||||
"Spatial Information:\nAverage: %f\nMax: %f\nMin: %f\n\n" |
||||
"Temporal Information:\nAverage: %f\nMax: %f\nMin: %f\n", |
||||
s->nb_frames, avg_si, s->max_si, s->min_si, avg_ti, s->max_ti, s->min_ti |
||||
); |
||||
} |
||||
|
||||
av_freep(&s->prev_frame); |
||||
av_freep(&s->gradient_matrix); |
||||
av_freep(&s->motion_matrix); |
||||
} |
||||
|
||||
static int config_input(AVFilterLink *inlink) |
||||
{ |
||||
// Video input data avilable
|
||||
AVFilterContext *ctx = inlink->dst; |
||||
SiTiContext *s = ctx->priv; |
||||
int max_pixsteps[4]; |
||||
size_t pixel_sz; |
||||
size_t data_sz; |
||||
size_t gradient_sz; |
||||
size_t motion_sz; |
||||
|
||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); |
||||
av_image_fill_max_pixsteps(max_pixsteps, NULL, desc); |
||||
|
||||
// free previous buffers in case they are allocated already
|
||||
av_freep(&s->prev_frame); |
||||
av_freep(&s->gradient_matrix); |
||||
av_freep(&s->motion_matrix); |
||||
|
||||
s->pixel_depth = max_pixsteps[0]; |
||||
s->width = inlink->w; |
||||
s->height = inlink->h; |
||||
pixel_sz = s->pixel_depth == 1 ? sizeof(uint8_t) : sizeof(uint16_t); |
||||
data_sz = s->width * pixel_sz * s->height; |
||||
|
||||
s->prev_frame = av_malloc(data_sz); |
||||
|
||||
gradient_sz = (s->width - 2) * sizeof(float) * (s->height - 2); |
||||
s->gradient_matrix = av_malloc(gradient_sz); |
||||
|
||||
motion_sz = s->width * sizeof(float) * s->height; |
||||
s->motion_matrix = av_malloc(motion_sz); |
||||
|
||||
if (!s->prev_frame || ! s->gradient_matrix || !s->motion_matrix) { |
||||
return AVERROR(ENOMEM); |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
// Determine whether the video is in full or limited range. If not defined, assume limited.
|
||||
static int is_full_range(AVFrame* frame) |
||||
{ |
||||
// If color range not specified, fallback to pixel format
|
||||
if (frame->color_range == AVCOL_RANGE_UNSPECIFIED || frame->color_range == AVCOL_RANGE_NB) |
||||
return frame->format == AV_PIX_FMT_YUVJ420P || frame->format == AV_PIX_FMT_YUVJ422P; |
||||
return frame->color_range == AVCOL_RANGE_JPEG; |
||||
} |
||||
|
||||
// Check frame's color range and convert to full range if needed
|
||||
static uint16_t convert_full_range(int factor, uint16_t y) |
||||
{ |
||||
int shift; |
||||
int limit_upper; |
||||
int full_upper; |
||||
int limit_y; |
||||
|
||||
// For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
|
||||
shift = 16 * factor; |
||||
limit_upper = 235 * factor - shift; |
||||
full_upper = 256 * factor - 1; |
||||
limit_y = fminf(fmaxf(y - shift, 0), limit_upper); |
||||
return (full_upper * limit_y / limit_upper); |
||||
} |
||||
|
||||
// Applies sobel convolution
|
||||
static void convolve_sobel(SiTiContext *s, const uint8_t *src, float *dst, int linesize) |
||||
{ |
||||
double x_conv_sum; |
||||
double y_conv_sum; |
||||
float gradient; |
||||
int ki; |
||||
int kj; |
||||
int index; |
||||
uint16_t data; |
||||
int filter_width = 3; |
||||
int filter_size = filter_width * filter_width; |
||||
int stride = linesize / s->pixel_depth; |
||||
// For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
|
||||
int factor = s->pixel_depth == 1 ? 1 : 4; |
||||
|
||||
// Dst matrix is smaller than src since we ignore edges that can't be convolved
|
||||
#define CONVOLVE(bps) \ |
||||
{ \
|
||||
uint##bps##_t *vsrc = (uint##bps##_t*)src; \
|
||||
for (int j = 1; j < s->height - 1; j++) { \
|
||||
for (int i = 1; i < s->width - 1; i++) { \
|
||||
x_conv_sum = 0.0; \
|
||||
y_conv_sum = 0.0; \
|
||||
for (int k = 0; k < filter_size; k++) { \
|
||||
ki = k % filter_width - 1; \
|
||||
kj = floor(k / filter_width) - 1; \
|
||||
index = (j + kj) * stride + (i + ki); \
|
||||
data = s->full_range ? vsrc[index] : convert_full_range(factor, vsrc[index]); \
|
||||
x_conv_sum += data * X_FILTER[k]; \
|
||||
y_conv_sum += data * Y_FILTER[k]; \
|
||||
} \
|
||||
gradient = sqrt(x_conv_sum * x_conv_sum + y_conv_sum * y_conv_sum); \
|
||||
dst[(j - 1) * (s->width - 2) + (i - 1)] = gradient; \
|
||||
} \
|
||||
} \
|
||||
} |
||||
|
||||
if (s->pixel_depth == 2) { |
||||
CONVOLVE(16); |
||||
} else { |
||||
CONVOLVE(8); |
||||
} |
||||
} |
||||
|
||||
// Calculate pixel difference between current and previous frame, and update previous
|
||||
static void calculate_motion(SiTiContext *s, const uint8_t *curr, |
||||
float *motion_matrix, int linesize) |
||||
{ |
||||
int stride = linesize / s->pixel_depth; |
||||
float motion; |
||||
int curr_index; |
||||
int prev_index; |
||||
uint16_t curr_data; |
||||
// For 8 bits, limited range goes from 16 to 235, for 10 bits the range is multiplied by 4
|
||||
int factor = s->pixel_depth == 1 ? 1 : 4; |
||||
|
||||
// Previous frame is already converted to full range
|
||||
#define CALCULATE(bps) \ |
||||
{ \
|
||||
uint##bps##_t *vsrc = (uint##bps##_t*)curr; \
|
||||
uint##bps##_t *vdst = (uint##bps##_t*)s->prev_frame; \
|
||||
for (int j = 0; j < s->height; j++) { \
|
||||
for (int i = 0; i < s->width; i++) { \
|
||||
motion = 0; \
|
||||
curr_index = j * stride + i; \
|
||||
prev_index = j * s->width + i; \
|
||||
curr_data = s->full_range ? vsrc[curr_index] : convert_full_range(factor, vsrc[curr_index]); \
|
||||
if (s->nb_frames > 1) \
|
||||
motion = curr_data - vdst[prev_index]; \
|
||||
vdst[prev_index] = curr_data; \
|
||||
motion_matrix[j * s->width + i] = motion; \
|
||||
} \
|
||||
} \
|
||||
} |
||||
|
||||
if (s->pixel_depth == 2) { |
||||
CALCULATE(16); |
||||
} else { |
||||
CALCULATE(8); |
||||
} |
||||
} |
||||
|
||||
static float std_deviation(float *img_metrics, int width, int height) |
||||
{ |
||||
int size = height * width; |
||||
double mean = 0.0; |
||||
double sqr_diff = 0; |
||||
|
||||
for (int j = 0; j < height; j++) |
||||
for (int i = 0; i < width; i++) |
||||
mean += img_metrics[j * width + i]; |
||||
|
||||
mean /= size; |
||||
|
||||
for (int j = 0; j < height; j++) { |
||||
for (int i = 0; i < width; i++) { |
||||
float mean_diff = img_metrics[j * width + i] - mean; |
||||
sqr_diff += (mean_diff * mean_diff); |
||||
} |
||||
} |
||||
sqr_diff = sqr_diff / size; |
||||
return sqrt(sqr_diff); |
||||
} |
||||
|
||||
static void set_meta(AVDictionary **metadata, const char *key, float d) |
||||
{ |
||||
char value[128]; |
||||
snprintf(value, sizeof(value), "%0.2f", d); |
||||
av_dict_set(metadata, key, value, 0); |
||||
} |
||||
|
||||
static int filter_frame(AVFilterLink *inlink, AVFrame *frame) |
||||
{ |
||||
AVFilterContext *ctx = inlink->dst; |
||||
SiTiContext *s = ctx->priv; |
||||
float si; |
||||
float ti; |
||||
|
||||
s->full_range = is_full_range(frame); |
||||
s->nb_frames++; |
||||
|
||||
// Calculate si and ti
|
||||
convolve_sobel(s, frame->data[0], s->gradient_matrix, frame->linesize[0]); |
||||
calculate_motion(s, frame->data[0], s->motion_matrix, frame->linesize[0]); |
||||
si = std_deviation(s->gradient_matrix, s->width - 2, s->height - 2); |
||||
ti = std_deviation(s->motion_matrix, s->width, s->height); |
||||
|
||||
// Calculate statistics
|
||||
s->max_si = fmaxf(si, s->max_si); |
||||
s->max_ti = fmaxf(ti, s->max_ti); |
||||
s->sum_si += si; |
||||
s->sum_ti += ti; |
||||
s->min_si = s->nb_frames == 1 ? si : fminf(si, s->min_si); |
||||
s->min_ti = s->nb_frames == 1 ? ti : fminf(ti, s->min_ti); |
||||
|
||||
// Set si ti information in frame metadata
|
||||
set_meta(&frame->metadata, "lavfi.siti.si", si); |
||||
set_meta(&frame->metadata, "lavfi.siti.ti", ti); |
||||
|
||||
return ff_filter_frame(inlink->dst->outputs[0], frame); |
||||
} |
||||
|
||||
#define OFFSET(x) offsetof(SiTiContext, x) |
||||
#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM |
||||
|
||||
static const AVOption siti_options[] = { |
||||
{ "print_summary", "Print summary showing average values", OFFSET(print_summary), AV_OPT_TYPE_BOOL, { .i64=0 }, 0, 1, FLAGS }, |
||||
{ NULL } |
||||
}; |
||||
|
||||
AVFILTER_DEFINE_CLASS(siti); |
||||
|
||||
static const AVFilterPad avfilter_vf_siti_inputs[] = { |
||||
{ |
||||
.name = "default", |
||||
.type = AVMEDIA_TYPE_VIDEO, |
||||
.config_props = config_input, |
||||
.filter_frame = filter_frame, |
||||
}, |
||||
}; |
||||
|
||||
static const AVFilterPad avfilter_vf_siti_outputs[] = { |
||||
{ |
||||
.name = "default", |
||||
.type = AVMEDIA_TYPE_VIDEO |
||||
}, |
||||
}; |
||||
|
||||
AVFilter ff_vf_siti = { |
||||
.name = "siti", |
||||
.description = NULL_IF_CONFIG_SMALL("Calculate spatial information (SI) and temporal information (TI)."), |
||||
.priv_size = sizeof(SiTiContext), |
||||
.priv_class = &siti_class, |
||||
.init = init, |
||||
.uninit = uninit, |
||||
.flags = AVFILTER_FLAG_METADATA_ONLY, |
||||
FILTER_PIXFMTS_ARRAY(pix_fmts), |
||||
FILTER_INPUTS(avfilter_vf_siti_inputs), |
||||
FILTER_OUTPUTS(avfilter_vf_siti_outputs), |
||||
}; |
@ -0,0 +1,15 @@ |
||||
frame:0 pts:0 pts_time:0 |
||||
lavfi.siti.si=106.72 |
||||
lavfi.siti.ti=0.00 |
||||
frame:1 pts:1 pts_time:1 |
||||
lavfi.siti.si=109.40 |
||||
lavfi.siti.ti=28.00 |
||||
frame:2 pts:2 pts_time:2 |
||||
lavfi.siti.si=109.29 |
||||
lavfi.siti.ti=28.38 |
||||
frame:3 pts:3 pts_time:3 |
||||
lavfi.siti.si=113.27 |
||||
lavfi.siti.ti=33.42 |
||||
frame:4 pts:4 pts_time:4 |
||||
lavfi.siti.si=110.87 |
||||
lavfi.siti.ti=30.53 |
Loading…
Reference in new issue