|
|
|
@ -29,22 +29,40 @@ |
|
|
|
|
#include "framesync.h" |
|
|
|
|
#include "internal.h" |
|
|
|
|
|
|
|
|
|
typedef struct Sums { |
|
|
|
|
uint64_t s[2]; |
|
|
|
|
} Sums; |
|
|
|
|
|
|
|
|
|
typedef struct QSums { |
|
|
|
|
float s[3]; |
|
|
|
|
} QSums; |
|
|
|
|
|
|
|
|
|
typedef struct CorrContext { |
|
|
|
|
const AVClass *class; |
|
|
|
|
FFFrameSync fs; |
|
|
|
|
double score, min_score, max_score, score_comp[4]; |
|
|
|
|
uint64_t nb_frames; |
|
|
|
|
int nb_threads; |
|
|
|
|
int is_rgb; |
|
|
|
|
uint8_t rgba_map[4]; |
|
|
|
|
int max[4]; |
|
|
|
|
char comps[4]; |
|
|
|
|
float mean[4][2]; |
|
|
|
|
Sums *sums; |
|
|
|
|
QSums *qsums; |
|
|
|
|
int nb_components; |
|
|
|
|
int planewidth[4]; |
|
|
|
|
int planeheight[4]; |
|
|
|
|
int (*filter_slice)(AVFilterContext *ctx, void *arg, |
|
|
|
|
int jobnr, int nb_jobs); |
|
|
|
|
int (*sum_slice)(AVFilterContext *ctx, void *arg, |
|
|
|
|
int jobnr, int nb_jobs); |
|
|
|
|
int (*corr_slice)(AVFilterContext *ctx, void *arg, |
|
|
|
|
int jobnr, int nb_jobs); |
|
|
|
|
} CorrContext; |
|
|
|
|
|
|
|
|
|
typedef struct ThreadData { |
|
|
|
|
AVFrame *master, *ref; |
|
|
|
|
} ThreadData; |
|
|
|
|
|
|
|
|
|
#define OFFSET(x) offsetof(CorrContext, x) |
|
|
|
|
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM |
|
|
|
|
|
|
|
|
@ -66,27 +84,31 @@ static void set_meta(AVFilterContext *ctx, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#define CORR(type, name) \ |
|
|
|
|
static void f##name(AVFilterContext *ctx, AVFrame *master, \
|
|
|
|
|
AVFrame *ref, double *comp_score) \
|
|
|
|
|
#define SUM(type, name) \ |
|
|
|
|
static int sum_##name(AVFilterContext *ctx, void *arg, \
|
|
|
|
|
int jobnr, int nb_jobs) \
|
|
|
|
|
{ \
|
|
|
|
|
CorrContext *s = ctx->priv; \
|
|
|
|
|
ThreadData *td = arg; \
|
|
|
|
|
AVFrame *master = td->master; \
|
|
|
|
|
AVFrame *ref = td->ref; \
|
|
|
|
|
\
|
|
|
|
|
for (int c = 0; c < s->nb_components; c++) { \
|
|
|
|
|
const ptrdiff_t linesize1 = master->linesize[c] / \
|
|
|
|
|
sizeof(type); \
|
|
|
|
|
const ptrdiff_t linesize2 = ref->linesize[c] / \
|
|
|
|
|
sizeof(type); \
|
|
|
|
|
const type *src1 = (const type *)master->data[c]; \
|
|
|
|
|
const type *src2 = (const type *)ref->data[c]; \
|
|
|
|
|
const int h = s->planeheight[c]; \
|
|
|
|
|
const int w = s->planewidth[c]; \
|
|
|
|
|
const float scale = 1.f / s->max[c]; \
|
|
|
|
|
const int slice_start = (h * jobnr) / nb_jobs; \
|
|
|
|
|
const int slice_end = (h * (jobnr+1)) / nb_jobs; \
|
|
|
|
|
const type *src1 = (const type *)master->data[c] + \
|
|
|
|
|
linesize1 * slice_start; \
|
|
|
|
|
const type *src2 = (const type *)ref->data[c] + \
|
|
|
|
|
linesize2 * slice_start; \
|
|
|
|
|
uint64_t sum1 = 0, sum2 = 0; \
|
|
|
|
|
float sum12, sum1q, sum2q; \
|
|
|
|
|
float sumq, mean1, mean2; \
|
|
|
|
|
\
|
|
|
|
|
for (int y = 0; y < h; y++) { \
|
|
|
|
|
for (int y = slice_start; y < slice_end; y++) { \
|
|
|
|
|
for (int x = 0; x < w; x++) { \
|
|
|
|
|
sum1 += src1[x]; \
|
|
|
|
|
sum2 += src2[x]; \
|
|
|
|
@ -96,17 +118,47 @@ static void f##name(AVFilterContext *ctx, AVFrame *master, \ |
|
|
|
|
src2 += linesize2; \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
mean1 = scale * (sum1 /(double)(w * h)); \
|
|
|
|
|
mean2 = scale * (sum2 /(double)(w * h)); \
|
|
|
|
|
s->sums[jobnr * s->nb_components + c].s[0] = sum1; \
|
|
|
|
|
s->sums[jobnr * s->nb_components + c].s[1] = sum2; \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
src1 = (const type *)master->data[c]; \
|
|
|
|
|
src2 = (const type *)ref->data[c]; \
|
|
|
|
|
return 0; \
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
SUM(uint8_t, slice8) |
|
|
|
|
SUM(uint16_t, slice16) |
|
|
|
|
|
|
|
|
|
#define CORR(type, name) \ |
|
|
|
|
static int corr_##name(AVFilterContext *ctx, void *arg, \
|
|
|
|
|
int jobnr, int nb_jobs) \
|
|
|
|
|
{ \
|
|
|
|
|
CorrContext *s = ctx->priv; \
|
|
|
|
|
ThreadData *td = arg; \
|
|
|
|
|
AVFrame *master = td->master; \
|
|
|
|
|
AVFrame *ref = td->ref; \
|
|
|
|
|
\
|
|
|
|
|
for (int c = 0; c < s->nb_components; c++) { \
|
|
|
|
|
const ptrdiff_t linesize1 = master->linesize[c] / \
|
|
|
|
|
sizeof(type); \
|
|
|
|
|
const ptrdiff_t linesize2 = ref->linesize[c] / \
|
|
|
|
|
sizeof(type); \
|
|
|
|
|
const type *src1 = (const type *)master->data[c]; \
|
|
|
|
|
const type *src2 = (const type *)ref->data[c]; \
|
|
|
|
|
const int h = s->planeheight[c]; \
|
|
|
|
|
const int w = s->planewidth[c]; \
|
|
|
|
|
const int slice_start = (h * jobnr) / nb_jobs; \
|
|
|
|
|
const int slice_end = (h * (jobnr+1)) / nb_jobs; \
|
|
|
|
|
const float scale = 1.f / s->max[c]; \
|
|
|
|
|
const float mean1 = s->mean[c][0]; \
|
|
|
|
|
const float mean2 = s->mean[c][1]; \
|
|
|
|
|
float sum12 = 0.f, sum1q = 0.f, sum2q = 0.f; \
|
|
|
|
|
\
|
|
|
|
|
sum12 = 0.f; \
|
|
|
|
|
sum1q = 0.f; \
|
|
|
|
|
sum2q = 0.f; \
|
|
|
|
|
src1 = (const type *)master->data[c] + \
|
|
|
|
|
slice_start * linesize1; \
|
|
|
|
|
src2 = (const type *)ref->data[c] + \
|
|
|
|
|
slice_start * linesize2; \
|
|
|
|
|
\
|
|
|
|
|
for (int y = 0; y < h; y++) { \
|
|
|
|
|
for (int y = slice_start; y < slice_end; y++) { \
|
|
|
|
|
for (int x = 0; x < w; x++) { \
|
|
|
|
|
const float f1 = scale * src1[x] - mean1; \
|
|
|
|
|
const float f2 = scale * src2[x] - mean2; \
|
|
|
|
@ -120,17 +172,16 @@ static void f##name(AVFilterContext *ctx, AVFrame *master, \ |
|
|
|
|
src2 += linesize2; \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
sumq = sqrtf(sum1q * sum2q); \
|
|
|
|
|
if (sumq > 0.f) { \
|
|
|
|
|
comp_score[c] = av_clipf(sum12 / sumq,-1.f,1.f); \
|
|
|
|
|
} else { \
|
|
|
|
|
comp_score[c] = sum1q == sum2q ? 1.f : 0.f; \
|
|
|
|
|
} \
|
|
|
|
|
s->qsums[jobnr * s->nb_components + c].s[0] = sum12; \
|
|
|
|
|
s->qsums[jobnr * s->nb_components + c].s[1] = sum1q; \
|
|
|
|
|
s->qsums[jobnr * s->nb_components + c].s[2] = sum2q; \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
return 0; \
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
CORR(uint8_t, corr8) |
|
|
|
|
CORR(uint16_t, corr16) |
|
|
|
|
CORR(uint8_t, slice8) |
|
|
|
|
CORR(uint16_t, slice16) |
|
|
|
|
|
|
|
|
|
static int do_corr(FFFrameSync *fs) |
|
|
|
|
{ |
|
|
|
@ -139,6 +190,7 @@ static int do_corr(FFFrameSync *fs) |
|
|
|
|
AVFrame *master, *ref; |
|
|
|
|
double comp_score[4], score = 0.; |
|
|
|
|
AVDictionary **metadata; |
|
|
|
|
ThreadData td; |
|
|
|
|
int ret; |
|
|
|
|
|
|
|
|
|
ret = ff_framesync_dualinput_get(fs, &master, &ref); |
|
|
|
@ -148,10 +200,42 @@ static int do_corr(FFFrameSync *fs) |
|
|
|
|
return ff_filter_frame(ctx->outputs[0], master); |
|
|
|
|
metadata = &master->metadata; |
|
|
|
|
|
|
|
|
|
if (s->max[0] > 255) { |
|
|
|
|
fcorr16(ctx, master, ref, comp_score); |
|
|
|
|
} else { |
|
|
|
|
fcorr8(ctx, master, ref, comp_score); |
|
|
|
|
td.master = master; |
|
|
|
|
td.ref = ref; |
|
|
|
|
ff_filter_execute(ctx, s->sum_slice, &td, NULL, |
|
|
|
|
FFMIN(s->planeheight[1], s->nb_threads)); |
|
|
|
|
|
|
|
|
|
for (int c = 0; c < s->nb_components; c++) { |
|
|
|
|
const double scale = 1.f / s->max[c]; |
|
|
|
|
uint64_t sum1 = 0, sum2 = 0; |
|
|
|
|
|
|
|
|
|
for (int n = 0; n < s->nb_threads; n++) { |
|
|
|
|
sum1 += s->sums[n * s->nb_components + c].s[0]; |
|
|
|
|
sum2 += s->sums[n * s->nb_components + c].s[1]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
s->mean[c][0] = scale * (sum1 /(double)(s->planewidth[c] * s->planeheight[c])); |
|
|
|
|
s->mean[c][1] = scale * (sum2 /(double)(s->planewidth[c] * s->planeheight[c])); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
ff_filter_execute(ctx, s->corr_slice, &td, NULL, |
|
|
|
|
FFMIN(s->planeheight[1], s->nb_threads)); |
|
|
|
|
|
|
|
|
|
for (int c = 0; c < s->nb_components; c++) { |
|
|
|
|
double sumq, sum12 = 0.0, sum1q = 0.0, sum2q = 0.0; |
|
|
|
|
|
|
|
|
|
for (int n = 0; n < s->nb_threads; n++) { |
|
|
|
|
sum12 += s->qsums[n * s->nb_components + c].s[0]; |
|
|
|
|
sum1q += s->qsums[n * s->nb_components + c].s[1]; |
|
|
|
|
sum2q += s->qsums[n * s->nb_components + c].s[2]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
sumq = sqrt(sum1q * sum2q); |
|
|
|
|
if (sumq > 0.0) { |
|
|
|
|
comp_score[c] = av_clipd(sum12 / sumq,-1.0,1.0); |
|
|
|
|
} else { |
|
|
|
|
comp_score[c] = sum1q == sum2q ? 1.f : 0.f; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for (int c = 0; c < s->nb_components; c++) |
|
|
|
@ -205,6 +289,7 @@ static int config_input_ref(AVFilterLink *inlink) |
|
|
|
|
AVFilterContext *ctx = inlink->dst; |
|
|
|
|
CorrContext *s = ctx->priv; |
|
|
|
|
|
|
|
|
|
s->nb_threads = ff_filter_get_nb_threads(ctx); |
|
|
|
|
s->nb_components = desc->nb_components; |
|
|
|
|
if (ctx->inputs[0]->w != ctx->inputs[1]->w || |
|
|
|
|
ctx->inputs[0]->h != ctx->inputs[1]->h) { |
|
|
|
@ -223,6 +308,11 @@ static int config_input_ref(AVFilterLink *inlink) |
|
|
|
|
s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); |
|
|
|
|
s->planewidth[0] = s->planewidth[3] = inlink->w; |
|
|
|
|
|
|
|
|
|
s->sums = av_calloc(s->nb_threads * s->nb_components, sizeof(*s->sums)); |
|
|
|
|
s->qsums = av_calloc(s->nb_threads * s->nb_components, sizeof(*s->qsums)); |
|
|
|
|
if (!s->qsums || !s->sums) |
|
|
|
|
return AVERROR(ENOMEM); |
|
|
|
|
|
|
|
|
|
s->min_score = +INFINITY; |
|
|
|
|
s->max_score = -INFINITY; |
|
|
|
|
|
|
|
|
@ -231,6 +321,9 @@ static int config_input_ref(AVFilterLink *inlink) |
|
|
|
|
s->max[2] = (1 << desc->comp[2].depth) - 1; |
|
|
|
|
s->max[3] = (1 << desc->comp[3].depth) - 1; |
|
|
|
|
|
|
|
|
|
s->sum_slice = desc->comp[0].depth > 8 ? sum_slice16 : sum_slice8; |
|
|
|
|
s->corr_slice = desc->comp[0].depth > 8 ? corr_slice16 : corr_slice8; |
|
|
|
|
|
|
|
|
|
return 0; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -291,6 +384,8 @@ static av_cold void uninit(AVFilterContext *ctx) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
ff_framesync_uninit(&s->fs); |
|
|
|
|
av_freep(&s->qsums); |
|
|
|
|
av_freep(&s->sums); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static const AVFilterPad corr_inputs[] = { |
|
|
|
@ -332,5 +427,6 @@ const AVFilter ff_vf_corr = { |
|
|
|
|
FILTER_OUTPUTS(corr_outputs), |
|
|
|
|
FILTER_PIXFMTS_ARRAY(pix_fmts), |
|
|
|
|
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | |
|
|
|
|
AVFILTER_FLAG_SLICE_THREADS | |
|
|
|
|
AVFILTER_FLAG_METADATA_ONLY, |
|
|
|
|
}; |
|
|
|
|