avcodec/vvcdec: split ctu table to zero init and no zero init parts

cus need to init to zero, other parts are not
5 months ago · e8ac761293
parent 4f60961498
commit e8ac761293
6 changed files with 20 additions and 28 deletions
--- a/libavcodec/vvc/ctu.c
+++ b/libavcodec/vvc/ctu.c
@ -1174,7 +1174,7 @@ static CodingUnit* alloc_cu(VVCLocalContext *lc, const int x0, const int y0)
    const VVCPPS *pps   = fc->ps.pps;
    const int rx        = x0 >> sps->ctb_log2_size_y;
    const int ry        = y0 >> sps->ctb_log2_size_y;
-    CTU *ctu            = fc->tab.ctus + ry * pps->ctb_width + rx;
+    CodingUnit **cus    = fc->tab.cus + ry * pps->ctb_width + rx;
    CodingUnit *cu      = ff_refstruct_pool_get(fc->cu_pool);

    if (!cu)
@ -1184,7 +1184,7 @@ static CodingUnit* alloc_cu(VVCLocalContext *lc, const int x0, const int y0)
    if (lc->cu)
        lc->cu->next = cu;
    else
-        ctu->cus = cu;
+        *cus = cu;
    lc->cu = cu;

    return cu;
@ -2429,7 +2429,9 @@ static void ctu_get_pred(VVCLocalContext *lc, const int rs)
    const VVCFrameContext *fc       = lc->fc;
    const H266RawSliceHeader *rsh   = lc->sc->sh.r;
    CTU *ctu                        = fc->tab.ctus + rs;
-    const CodingUnit *cu            = ctu->cus;
+    const CodingUnit *cu            = fc->tab.cus[rs];
+
+    ctu->has_dmvr = 0;

    if (IS_I(rsh))
        return;
@ -2526,9 +2528,8 @@ void ff_vvc_set_neighbour_available(VVCLocalContext *lc,
    lc->na.cand_up_right = lc->na.cand_up_right_sap && (x0 + w) < lc->end_of_tiles_x;
 }

-void ff_vvc_ctu_free_cus(CTU *ctu)
+void ff_vvc_ctu_free_cus(CodingUnit **cus)
 {
-    CodingUnit **cus  = &ctu->cus;
    while (*cus) {
        CodingUnit *cu          = *cus;
        TransformUnit **head    = &cu->tus.head;
--- a/libavcodec/vvc/ctu.h
+++ b/libavcodec/vvc/ctu.h
@ -329,7 +329,6 @@ typedef struct CodingUnit {
 } CodingUnit;

 typedef struct CTU {
-    CodingUnit *cus;
    int max_y[2][VVC_MAX_REF_ENTRIES];
    int max_y_idx[2];
    int has_dmvr;
@ -484,7 +483,7 @@ int ff_vvc_coding_tree_unit(VVCLocalContext *lc, int ctu_idx, int rs, int rx, in
 //utils
 void ff_vvc_set_neighbour_available(VVCLocalContext *lc, int x0, int y0, int w, int h);
 void ff_vvc_decode_neighbour(VVCLocalContext *lc, int x_ctb, int y_ctb, int rx, int ry, int rs);
-void ff_vvc_ctu_free_cus(CTU *ctu);
+void ff_vvc_ctu_free_cus(CodingUnit **cus);
 int ff_vvc_get_qPy(const VVCFrameContext *fc, int xc, int yc);
 void ff_vvc_ep_init_stat_coeff(EntryPoint *ep, int bit_depth, int persistent_rice_adaptation_enabled_flag);

--- a/libavcodec/vvc/dec.c
+++ b/libavcodec/vvc/dec.c
@ -91,17 +91,6 @@ static int tl_create(TabList *l)
    return 0;
 }

-static void ctu_tl_init(TabList *l, VVCFrameContext *fc)
-{
-    const VVCPPS *pps   = fc->ps.pps;
-    const int ctu_count = pps ? pps->ctb_count : 0;
-    const int changed   = fc->tab.sz.ctu_count != ctu_count;
-
-    tl_init(l, 1, changed);
-
-    TL_ADD(ctus,    ctu_count);
-}
-
 static void ctu_nz_tl_init(TabList *l, VVCFrameContext *fc)
 {
    const VVCSPS *sps   = fc->ps.sps;
@ -112,6 +101,8 @@ static void ctu_nz_tl_init(TabList *l, VVCFrameContext *fc)

    tl_init(l, 0, changed);

+    TL_ADD(cus,     ctu_count);
+    TL_ADD(ctus,    ctu_count);
    TL_ADD(deblock, ctu_count);
    TL_ADD(sao,     ctu_count);
    TL_ADD(alf,     ctu_count);
@ -307,7 +298,6 @@ typedef void (*tl_init_fn)(TabList *l, VVCFrameContext *fc);
 static int frame_context_for_each_tl(VVCFrameContext *fc, int (*unary_fn)(TabList *l))
 {
    const tl_init_fn init[] = {
-        ctu_tl_init,
        ctu_nz_tl_init,
        min_cb_tl_init,
        min_cb_nz_tl_init,
@ -334,9 +324,9 @@ static int frame_context_for_each_tl(VVCFrameContext *fc, int (*unary_fn)(TabLis

 static void free_cus(VVCFrameContext *fc)
 {
-    if (fc->tab.ctus) {
+    if (fc->tab.cus) {
        for (int i = 0; i < fc->tab.sz.ctu_count; i++)
-            ff_vvc_ctu_free_cus(fc->tab.ctus + i);
+            ff_vvc_ctu_free_cus(fc->tab.cus + i);
    }
 }

@ -364,6 +354,9 @@ static int pic_arrays_init(VVCContext *s, VVCFrameContext *fc)
    if (ret < 0)
        return ret;

+    // for error handling case, we may call free_cus before VVC_TASK_STAGE_INIT, so we need to set cus to 0 here
+    memset(fc->tab.cus, 0, sizeof(*fc->tab.cus) * ctu_count);
+
    memset(fc->tab.slice_idx, -1, sizeof(*fc->tab.slice_idx) * ctu_count);

    if (fc->tab.sz.ctu_count != ctu_count) {
--- a/libavcodec/vvc/dec.h
+++ b/libavcodec/vvc/dec.h
@ -187,8 +187,9 @@ typedef struct VVCFrameContext {
        uint8_t *alf_pixel_buffer_h[VVC_MAX_SAMPLE_ARRAYS][2];
        uint8_t *alf_pixel_buffer_v[VVC_MAX_SAMPLE_ARRAYS][2];

-        int         *coeffs;
-        struct CTU  *ctus;
+        int                 *coeffs;
+        struct CTU          *ctus;
+        struct CodingUnit  **cus;

        uint8_t *ibc_vir_buf[VVC_MAX_SAMPLE_ARRAYS];    ///< IbcVirBuf[]

--- a/libavcodec/vvc/inter.c
+++ b/libavcodec/vvc/inter.c
@ -1003,8 +1003,7 @@ static int has_inter_luma(const CodingUnit *cu)
 int ff_vvc_predict_inter(VVCLocalContext *lc, const int rs)
 {
    const VVCFrameContext *fc = lc->fc;
-    const CTU *ctu            = fc->tab.ctus + rs;
-    CodingUnit *cu            = ctu->cus;
+    CodingUnit *cu            = fc->tab.cus[rs];

    while (cu) {
        lc->cu = cu;
--- a/libavcodec/vvc/intra.c
+++ b/libavcodec/vvc/intra.c
@ -664,8 +664,7 @@ int ff_vvc_reconstruct(VVCLocalContext *lc, const int rs, const int rx, const in
    const VVCSPS *sps           = fc->ps.sps;
    const int x_ctb             = rx << sps->ctb_log2_size_y;
    const int y_ctb             = ry << sps->ctb_log2_size_y;
-    CTU *ctu                    = fc->tab.ctus + rs;
-    CodingUnit *cu              = ctu->cus;
+    CodingUnit *cu              = fc->tab.cus[rs];
    int ret                     = 0;

    lc->num_ras[0] = lc->num_ras[1] = 0;
@ -691,7 +690,7 @@ int ff_vvc_reconstruct(VVCLocalContext *lc, const int rs, const int rx, const in
            ibc_fill_vir_buf(lc, cu);
        cu = cu->next;
    }
-    ff_vvc_ctu_free_cus(ctu);
+    ff_vvc_ctu_free_cus(fc->tab.cus + rs);
    return ret;
 }