Merge remote-tracking branch 'qatar/master'

* qatar/master: vf_fade: support slice threading vf_yadif: support slice threading Conflicts: libavfilter/vf_fade.c libavfilter/vf_yadif.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
12 years ago · 558ce3e5b7
parent 2516b393d4 8a994b7406
commit 558ce3e5b7
2 changed files with 116 additions and 52 deletions
--- a/libavfilter/vf_fade.c
+++ b/libavfilter/vf_fade.c
@ -158,11 +158,61 @@ static void fade_plane(int y, int h, int w,
    }
 }

-static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+static int filter_slice_luma(AVFilterContext *ctx, void *arg, int jobnr,
+                             int nb_jobs)
 {
-    FadeContext *s = inlink->dst->priv;
-    uint8_t *p;
+    FadeContext *s = ctx->priv;
+    AVFrame *frame = arg;
+    int slice_h     = frame->height / nb_jobs;
+    int slice_start = jobnr * slice_h;
+    int slice_end   = (jobnr == nb_jobs - 1) ? frame->height : (jobnr + 1) * slice_h;
+    int i, j;
+
+    for (i = slice_start; i < slice_end; i++) {
+        uint8_t *p = frame->data[0] + i * frame->linesize[0];
+        for (j = 0; j < frame->width * s->bpp; j++) {
+            /* s->factor is using 16 lower-order bits for decimal
+             * places. 32768 = 1 << 15, it is an integer representation
+             * of 0.5 and is for rounding. */
+            *p = ((*p - s->black_level) * s->factor + s->black_level_scaled) >> 16;
+            p++;
+        }
+    }
+
+    return 0;
+}
+
+static int filter_slice_chroma(AVFilterContext *ctx, void *arg, int jobnr,
+                               int nb_jobs)
+{
+    FadeContext *s = ctx->priv;
+    AVFrame *frame = arg;
+    int slice_h     = FFALIGN(frame->height / nb_jobs, 1 << s->vsub);
+    int slice_start = jobnr * slice_h;
+    int slice_end   = (jobnr == nb_jobs - 1) ? frame->height : (jobnr + 1) * slice_h;
    int i, j, plane;
+    const int width = FF_CEIL_RSHIFT(frame->width, s->hsub);
+
+    for (plane = 1; plane < 3; plane++) {
+        for (i = slice_start; i < slice_end; i++) {
+            uint8_t *p = frame->data[plane] + (i >> s->vsub) * frame->linesize[plane];
+            for (j = 0; j < width; j++) {
+                /* 8421367 = ((128 << 1) + 1) << 15. It is an integer
+                 * representation of 128.5. The .5 is for rounding
+                 * purposes. */
+                *p = ((*p - 128) * s->factor + 8421367) >> 16;
+                p++;
+            }
+        }
+    }
+
+    return 0;
+}
+
+static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
+{
+    AVFilterContext *ctx = inlink->dst;
+    FadeContext *s       = ctx->priv;
    double frame_timestamp = frame->pts == AV_NOPTS_VALUE ? -1 : frame->pts * av_q2d(inlink->time_base);

    // Calculate Fade assuming this is a Fade In
@ -216,7 +266,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
    if (s->factor < UINT16_MAX) {
        if (s->alpha) {
            // alpha only
-            plane = s->is_packed_rgb ? 0 : A; // alpha is on plane 0 for packed formats
+            int plane = s->is_packed_rgb ? 0 : A; // alpha is on plane 0 for packed formats
                                                 // or plane 3 for planar formats
            fade_plane(0, frame->height, inlink->w,
                       s->factor, s->black_level, s->black_level_scaled,
@ -225,25 +275,13 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
                       1, frame->data[plane], frame->linesize[plane]);
        } else {
            /* luma or rgb plane */
-            fade_plane(0, frame->height, inlink->w,
-                       s->factor, s->black_level, s->black_level_scaled,
-                       0, 1, // offset & pixstep for Y plane or RGB packed format
-                       s->bpp, frame->data[0], frame->linesize[0]);
+            ctx->internal->execute(ctx, filter_slice_luma, frame, NULL,
+                                FFMIN(frame->height, ctx->graph->nb_threads));
+
            if (frame->data[1] && frame->data[2]) {
                /* chroma planes */
-                for (plane = 1; plane < 3; plane++) {
-                    for (i = 0; i < frame->height; i++) {
-                        const int width = FF_CEIL_RSHIFT(inlink->w, s->hsub);
-                        p = frame->data[plane] + (i >> s->vsub) * frame->linesize[plane];
-                        for (j = 0; j < width; j++) {
-                            /* 8421367 = ((128 << 1) + 1) << 15. It is an integer
-                             * representation of 128.5. The .5 is for rounding
-                             * purposes. */
-                            *p = ((*p - 128) * s->factor + 8421367) >> 16;
-                            p++;
-                        }
-                    }
-                }
+                ctx->internal->execute(ctx, filter_slice_chroma, frame, NULL,
+                                    FFMIN(frame->height, ctx->graph->nb_threads));
            }
        }
    }
@ -314,4 +352,5 @@ AVFilter avfilter_vf_fade = {

    .inputs    = avfilter_vf_fade_inputs,
    .outputs   = avfilter_vf_fade_outputs,
+    .flags     = AVFILTER_FLAG_SLICE_THREADS,
 };
--- a/libavfilter/vf_yadif.c
+++ b/libavfilter/vf_yadif.c
@ -31,6 +31,14 @@
 #undef NDEBUG
 #include <assert.h>

+typedef struct ThreadData {
+    AVFrame *frame;
+    int plane;
+    int w, h;
+    int parity;
+    int tff;
+} ThreadData;
+
 #define CHECK(j)\
    {   int score = FFABS(cur[mrefs - 1 + (j)] - cur[prefs - 1 - (j)])\
                  + FFABS(cur[mrefs  +(j)] - cur[prefs  -(j)])\
@ -172,50 +180,67 @@ static void filter_edges_16bit(void *dst1, void *prev1, void *cur1, void *next1,
    FILTER(w - 3, w, 0)
 }

+static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+    YADIFContext *s = ctx->priv;
+    ThreadData *td  = arg;
+    int refs = s->cur->linesize[td->plane];
+    int df = (s->csp->comp[td->plane].depth_minus1 + 8) / 8;
+    int pix_3 = 3 * df;
+    int slice_h = td->h / nb_jobs;
+    int slice_start = jobnr * slice_h;
+    int slice_end   = (jobnr == nb_jobs - 1) ? td->h : (jobnr + 1) * slice_h;
+    int y;
+
+    /* filtering reads 3 pixels to the left/right; to avoid invalid reads,
+     * we need to call the c variant which avoids this for border pixels
+     */
+    for (y = slice_start; y < slice_end; y++) {
+        if ((y ^ td->parity) & 1) {
+            uint8_t *prev = &s->prev->data[td->plane][y * refs];
+            uint8_t *cur  = &s->cur ->data[td->plane][y * refs];
+            uint8_t *next = &s->next->data[td->plane][y * refs];
+            uint8_t *dst  = &td->frame->data[td->plane][y * td->frame->linesize[td->plane]];
+            int     mode  = y == 1 || y + 2 == td->h ? 2 : s->mode;
+            s->filter_line(dst + pix_3, prev + pix_3, cur + pix_3,
+                           next + pix_3, td->w - 6,
+                           y + 1 < td->h ? refs : -refs,
+                           y ? -refs : refs,
+                           td->parity ^ td->tff, mode);
+            s->filter_edges(dst, prev, cur, next, td->w,
+                            y + 1 < td->h ? refs : -refs,
+                            y ? -refs : refs,
+                            td->parity ^ td->tff, mode);
+        } else {
+            memcpy(&td->frame->data[td->plane][y * td->frame->linesize[td->plane]],
+                   &s->cur->data[td->plane][y * refs], td->w * df);
+        }
+    }
+    return 0;
+}
+
 static void filter(AVFilterContext *ctx, AVFrame *dstpic,
                   int parity, int tff)
 {
    YADIFContext *yadif = ctx->priv;
-    int y, i;
+    ThreadData td = { .frame = dstpic, .parity = parity, .tff = tff };
+    int i;

    for (i = 0; i < yadif->csp->nb_components; i++) {
        int w = dstpic->width;
        int h = dstpic->height;
-        int refs = yadif->cur->linesize[i];
-        int df = (yadif->csp->comp[i].depth_minus1 + 8) / 8;
-        int pix_3 = 3 * df;

        if (i == 1 || i == 2) {
-        /* Why is this not part of the per-plane description thing? */
            w = FF_CEIL_RSHIFT(w, yadif->csp->log2_chroma_w);
            h = FF_CEIL_RSHIFT(h, yadif->csp->log2_chroma_h);
        }

-        /* filtering reads 3 pixels to the left/right; to avoid invalid reads,
-         * we need to call the c variant which avoids this for border pixels
-         */
-
-        for (y = 0; y < h; y++) {
-            if ((y ^ parity) & 1) {
-                uint8_t *prev = &yadif->prev->data[i][y * refs];
-                uint8_t *cur  = &yadif->cur ->data[i][y * refs];
-                uint8_t *next = &yadif->next->data[i][y * refs];
-                uint8_t *dst  = &dstpic->data[i][y * dstpic->linesize[i]];
-                int     mode  = y == 1 || y + 2 == h ? 2 : yadif->mode;
-                yadif->filter_line(dst + pix_3, prev + pix_3, cur + pix_3,
-                                   next + pix_3, w - 6,
-                                   y + 1 < h ? refs : -refs,
-                                   y ? -refs : refs,
-                                   parity ^ tff, mode);
-                yadif->filter_edges(dst, prev, cur, next, w,
-                                    y + 1 < h ? refs : -refs,
-                                    y ? -refs : refs,
-                                    parity ^ tff, mode);
-            } else {
-                memcpy(&dstpic->data[i][y * dstpic->linesize[i]],
-                       &yadif->cur->data[i][y * refs], w * df);
-            }
-        }
+
+        td.w       = w;
+        td.h       = h;
+        td.plane   = i;
+
+        ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(h, ctx->graph->nb_threads));
    }

    emms_c();
@ -484,5 +509,5 @@ AVFilter avfilter_vf_yadif = {

    .inputs    = avfilter_vf_yadif_inputs,
    .outputs   = avfilter_vf_yadif_outputs,
-    .flags     = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL,
+    .flags     = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
 };