From c05ba94ce87a785924e81982be4476b978def3cc Mon Sep 17 00:00:00 2001 From: Nuo Mi Date: Tue, 5 Dec 2023 22:45:14 +0800 Subject: [PATCH] vvcdec: add intra prediction Co-authored-by: Xu Mu Co-authored-by: Frank Plowman Co-authored-by: Shaun Loo Co-authored-by: Wu Jianhua --- libavcodec/vvc/Makefile | 1 + libavcodec/vvc/vvc_ctu.c | 50 ++ libavcodec/vvc/vvc_ctu.h | 2 + libavcodec/vvc/vvc_intra.c | 797 +++++++++++++++++++++ libavcodec/vvc/vvc_intra.h | 49 ++ libavcodec/vvc/vvc_intra_template.c | 1015 +++++++++++++++++++++++++++ 6 files changed, 1914 insertions(+) create mode 100644 libavcodec/vvc/vvc_intra.c create mode 100644 libavcodec/vvc/vvc_intra.h create mode 100644 libavcodec/vvc/vvc_intra_template.c diff --git a/libavcodec/vvc/Makefile b/libavcodec/vvc/Makefile index f28a47b0ee..35bb565680 100644 --- a/libavcodec/vvc/Makefile +++ b/libavcodec/vvc/Makefile @@ -5,6 +5,7 @@ OBJS-$(CONFIG_VVC_DECODER) += vvc/vvc_cabac.o \ vvc/vvc_ctu.o \ vvc/vvc_data.o \ vvc/vvc_inter.o \ + vvc/vvc_intra.o \ vvc/vvc_itx_1d.o \ vvc/vvc_mvs.o \ vvc/vvc_ps.o \ diff --git a/libavcodec/vvc/vvc_ctu.c b/libavcodec/vvc/vvc_ctu.c index df027e1465..7bbb9a7e5c 100644 --- a/libavcodec/vvc/vvc_ctu.c +++ b/libavcodec/vvc/vvc_ctu.c @@ -20,10 +20,41 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavcodec/refstruct.h" + #include "vvc_cabac.h" #include "vvc_ctu.h" #include "vvc_mvs.h" +void ff_vvc_decode_neighbour(VVCLocalContext *lc, const int x_ctb, const int y_ctb, + const int rx, const int ry, const int rs) +{ + VVCFrameContext *fc = lc->fc; + const int ctb_size = fc->ps.sps->ctb_size_y; + + lc->end_of_tiles_x = fc->ps.sps->width; + lc->end_of_tiles_y = fc->ps.sps->height; + if (fc->ps.pps->ctb_to_col_bd[rx] != fc->ps.pps->ctb_to_col_bd[rx + 1]) + lc->end_of_tiles_x = FFMIN(x_ctb + ctb_size, lc->end_of_tiles_x); + if (fc->ps.pps->ctb_to_row_bd[ry] != fc->ps.pps->ctb_to_row_bd[ry + 1]) + lc->end_of_tiles_y = FFMIN(y_ctb + ctb_size, lc->end_of_tiles_y); + + lc->boundary_flags = 0; + if (rx > 0 && fc->ps.pps->ctb_to_col_bd[rx] != fc->ps.pps->ctb_to_col_bd[rx - 1]) + lc->boundary_flags |= BOUNDARY_LEFT_TILE; + if (rx > 0 && fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - 1]) + lc->boundary_flags |= BOUNDARY_LEFT_SLICE; + if (ry > 0 && fc->ps.pps->ctb_to_row_bd[ry] != fc->ps.pps->ctb_to_row_bd[ry - 1]) + lc->boundary_flags |= BOUNDARY_UPPER_TILE; + if (ry > 0 && fc->tab.slice_idx[rs] != fc->tab.slice_idx[rs - fc->ps.pps->ctb_width]) + lc->boundary_flags |= BOUNDARY_UPPER_SLICE; + lc->ctb_left_flag = rx > 0 && !(lc->boundary_flags & BOUNDARY_LEFT_TILE); + lc->ctb_up_flag = ry > 0 && !(lc->boundary_flags & BOUNDARY_UPPER_TILE) && !(lc->boundary_flags & BOUNDARY_UPPER_SLICE); + lc->ctb_up_right_flag = lc->ctb_up_flag && (fc->ps.pps->ctb_to_col_bd[rx] == fc->ps.pps->ctb_to_col_bd[rx + 1]) && + (fc->ps.pps->ctb_to_row_bd[ry] == fc->ps.pps->ctb_to_row_bd[ry - 1]); + lc->ctb_up_left_flag = lc->ctb_left_flag && lc->ctb_up_flag; +} + void ff_vvc_set_neighbour_available(VVCLocalContext *lc, const int x0, const int y0, const int w, const int h) { @@ -39,6 +70,25 @@ void ff_vvc_set_neighbour_available(VVCLocalContext *lc, lc->na.cand_up_right = lc->na.cand_up_right_sap && (x0 + w) < lc->end_of_tiles_x; } +void ff_vvc_ctu_free_cus(CTU *ctu) +{ + CodingUnit **cus = &ctu->cus; + while (*cus) { + CodingUnit *cu = *cus; + TransformUnit **head = &cu->tus.head; + + *cus = cu->next; + + while (*head) { + TransformUnit *tu = *head; + *head = tu->next; + ff_refstruct_unref(&tu); + } + cu->tus.tail = NULL; + + ff_refstruct_unref(&cu); + } +} void ff_vvc_ep_init_stat_coeff(EntryPoint *ep, const int bit_depth, const int persistent_rice_adaptation_enabled_flag) diff --git a/libavcodec/vvc/vvc_ctu.h b/libavcodec/vvc/vvc_ctu.h index d35b680aff..47c9f181bb 100644 --- a/libavcodec/vvc/vvc_ctu.h +++ b/libavcodec/vvc/vvc_ctu.h @@ -461,6 +461,8 @@ typedef struct ALFParams { //utils void ff_vvc_set_neighbour_available(VVCLocalContext *lc, int x0, int y0, int w, int h); +void ff_vvc_decode_neighbour(VVCLocalContext *lc, int x_ctb, int y_ctb, int rx, int ry, int rs); +void ff_vvc_ctu_free_cus(CTU *ctu); void ff_vvc_ep_init_stat_coeff(EntryPoint *ep, int bit_depth, int persistent_rice_adaptation_enabled_flag); #endif // AVCODEC_VVC_VVC_CTU_H diff --git a/libavcodec/vvc/vvc_intra.c b/libavcodec/vvc/vvc_intra.c new file mode 100644 index 0000000000..43de312a71 --- /dev/null +++ b/libavcodec/vvc/vvc_intra.c @@ -0,0 +1,797 @@ +/* + * VVC intra prediction + * + * Copyright (C) 2021 Nuo Mi + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "libavutil/frame.h" + +#include "vvc_data.h" +#include "vvc_inter.h" +#include "vvc_intra.h" +#include "vvc_itx_1d.h" + +static int is_cclm(enum IntraPredMode mode) +{ + return mode == INTRA_LT_CCLM || mode == INTRA_L_CCLM || mode == INTRA_T_CCLM; +} + +static int derive_ilfnst_pred_mode_intra(const VVCLocalContext *lc, const TransformBlock *tb) +{ + const VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const CodingUnit *cu = lc->cu; + const int x_tb = tb->x0 >> fc->ps.sps->min_cb_log2_size_y; + const int y_tb = tb->y0 >> fc->ps.sps->min_cb_log2_size_y; + const int x_c = (tb->x0 + (tb->tb_width << sps->hshift[1] >> 1) ) >> fc->ps.sps->min_cb_log2_size_y; + const int y_c = (tb->y0 + (tb->tb_height << sps->vshift[1] >> 1)) >> fc->ps.sps->min_cb_log2_size_y; + const int min_cb_width = fc->ps.pps->min_cb_width; + const int intra_mip_flag = SAMPLE_CTB(fc->tab.imf, x_tb, y_tb); + int pred_mode_intra = tb->c_idx == 0 ? cu->intra_pred_mode_y : cu->intra_pred_mode_c; + if (intra_mip_flag && !tb->c_idx) { + pred_mode_intra = INTRA_PLANAR; + } else if (is_cclm(pred_mode_intra)) { + int intra_mip_flag_c = SAMPLE_CTB(fc->tab.imf, x_c, y_c); + int cu_pred_mode = SAMPLE_CTB(fc->tab.cpm[0], x_c, y_c); + if (intra_mip_flag_c) { + pred_mode_intra = INTRA_PLANAR; + } else if (cu_pred_mode == MODE_IBC || cu_pred_mode == MODE_PLT) { + pred_mode_intra = INTRA_DC; + } else { + pred_mode_intra = SAMPLE_CTB(fc->tab.ipm, x_c, y_c); + } + } + pred_mode_intra = ff_vvc_wide_angle_mode_mapping(cu, tb->tb_width, tb->tb_height, tb->c_idx, pred_mode_intra); + + return pred_mode_intra; +} + +//8.7.4 Transformation process for scaled transform coefficients +static void ilfnst_transform(const VVCLocalContext *lc, TransformBlock *tb) +{ + const VVCSPS *sps = lc->fc->ps.sps; + const CodingUnit *cu = lc->cu; + const int w = tb->tb_width; + const int h = tb->tb_height; + const int n_lfnst_out_size = (w >= 8 && h >= 8) ? 48 : 16; ///< nLfnstOutSize + const int log2_lfnst_size = (w >= 8 && h >= 8) ? 3 : 2; ///< log2LfnstSize + const int n_lfnst_size = 1 << log2_lfnst_size; ///< nLfnstSize + const int non_zero_size = ((w == 8 && h == 8) || (w == 4 && h == 4)) ? 8 : 16; ///< nonZeroSize + const int pred_mode_intra = derive_ilfnst_pred_mode_intra(lc, tb); + const int transpose = pred_mode_intra > 34; + int u[16], v[48]; + + for (int x = 0; x < non_zero_size; x++) { + int xc = ff_vvc_diag_scan_x[2][2][x]; + int yc = ff_vvc_diag_scan_y[2][2][x]; + u[x] = tb->coeffs[w * yc + xc]; + } + ff_vvc_inv_lfnst_1d(v, u, non_zero_size, n_lfnst_out_size, pred_mode_intra, + cu->lfnst_idx, sps->log2_transform_range); + if (transpose) { + int *dst = tb->coeffs; + const int *src = v; + if (n_lfnst_size == 4) { + for (int y = 0; y < 4; y++) { + dst[0] = src[0]; + dst[1] = src[4]; + dst[2] = src[8]; + dst[3] = src[12]; + src++; + dst += w; + } + } else { + for (int y = 0; y < 8; y++) { + dst[0] = src[0]; + dst[1] = src[8]; + dst[2] = src[16]; + dst[3] = src[24]; + if (y < 4) { + dst[4] = src[32]; + dst[5] = src[36]; + dst[6] = src[40]; + dst[7] = src[44]; + } + src++; + dst += w; + } + } + + } else { + int *dst = tb->coeffs; + const int *src = v; + for (int y = 0; y < n_lfnst_size; y++) { + int size = (y < 4) ? n_lfnst_size : 4; + memcpy(dst, src, size * sizeof(int)); + src += size; + dst += w; + } + } + tb->max_scan_x = n_lfnst_size - 1; + tb->max_scan_y = n_lfnst_size - 1; +} + +//part of 8.7.4 Transformation process for scaled transform coefficients +static void derive_transform_type(const VVCFrameContext *fc, const VVCLocalContext *lc, const TransformBlock *tb, enum TxType *trh, enum TxType *trv) +{ + const CodingUnit *cu = lc->cu; + static const enum TxType mts_to_trh[] = {DCT2, DST7, DCT8, DST7, DCT8}; + static const enum TxType mts_to_trv[] = {DCT2, DST7, DST7, DCT8, DCT8}; + const VVCSPS *sps = fc->ps.sps; + int implicit_mts_enabled = 0; + if (tb->c_idx || (cu->isp_split_type != ISP_NO_SPLIT && cu->lfnst_idx)) { + *trh = *trv = DCT2; + return; + } + + if (sps->r->sps_mts_enabled_flag) { + if (cu->isp_split_type != ISP_NO_SPLIT || + (cu->sbt_flag && FFMAX(tb->tb_width, tb->tb_height) <= 32) || + (!sps->r->sps_explicit_mts_intra_enabled_flag && cu->pred_mode == MODE_INTRA && + !cu->lfnst_idx && !cu->intra_mip_flag)) { + implicit_mts_enabled = 1; + } + } + if (implicit_mts_enabled) { + const int w = tb->tb_width; + const int h = tb->tb_height; + if (cu->sbt_flag) { + *trh = (cu->sbt_horizontal_flag || cu->sbt_pos_flag) ? DST7 : DCT8; + *trv = (!cu->sbt_horizontal_flag || cu->sbt_pos_flag) ? DST7 : DCT8; + } else { + *trh = (w >= 4 && w <= 16) ? DST7 : DCT2; + *trv = (h >= 4 && h <= 16) ? DST7 : DCT2; + } + return; + } + *trh = mts_to_trh[cu->mts_idx]; + *trv = mts_to_trv[cu->mts_idx]; +} + +static void add_residual_for_joint_coding_chroma(VVCLocalContext *lc, + const TransformUnit *tu, TransformBlock *tb, const int chroma_scale) +{ + const VVCFrameContext *fc = lc->fc; + const CodingUnit *cu = lc->cu; + const int c_sign = 1 - 2 * fc->ps.ph.r->ph_joint_cbcr_sign_flag; + const int shift = tu->coded_flag[1] ^ tu->coded_flag[2]; + const int c_idx = 1 + tu->coded_flag[1]; + const ptrdiff_t stride = fc->frame->linesize[c_idx]; + const int hs = fc->ps.sps->hshift[c_idx]; + const int vs = fc->ps.sps->vshift[c_idx]; + uint8_t *dst = &fc->frame->data[c_idx][(tb->y0 >> vs) * stride + + ((tb->x0 >> hs) << fc->ps.sps->pixel_shift)]; + if (chroma_scale) { + fc->vvcdsp.itx.pred_residual_joint(tb->coeffs, tb->tb_width, tb->tb_height, c_sign, shift); + fc->vvcdsp.intra.lmcs_scale_chroma(lc, tb->coeffs, tb->coeffs, tb->tb_width, tb->tb_height, cu->x0, cu->y0); + fc->vvcdsp.itx.add_residual(dst, tb->coeffs, tb->tb_width, tb->tb_height, stride); + } else { + fc->vvcdsp.itx.add_residual_joint(dst, tb->coeffs, tb->tb_width, tb->tb_height, stride, c_sign, shift); + } +} + +static int add_reconstructed_area(VVCLocalContext *lc, const int ch_type, const int x0, const int y0, const int w, const int h) +{ + const VVCSPS *sps = lc->fc->ps.sps; + const int hs = sps->hshift[ch_type]; + const int vs = sps->vshift[ch_type]; + ReconstructedArea *a; + + if (lc->num_ras[ch_type] >= FF_ARRAY_ELEMS(lc->ras[ch_type])) + return AVERROR_INVALIDDATA; + + a = &lc->ras[ch_type][lc->num_ras[ch_type]]; + a->x = x0 >> hs; + a->y = y0 >> vs; + a->w = w >> hs; + a->h = h >> vs; + lc->num_ras[ch_type]++; + + return 0; +} + +static void add_tu_area(const TransformUnit *tu, int *x0, int *y0, int *w, int *h) +{ + *x0 = tu->x0; + *y0 = tu->y0; + *w = tu->width; + *h = tu->height; +} + +#define MIN_ISP_PRED_WIDTH 4 +static int get_luma_predict_unit(const CodingUnit *cu, const TransformUnit *tu, const int idx, int *x0, int *y0, int *w, int *h) +{ + int has_luma = 1; + add_tu_area(tu, x0, y0, w, h); + if (cu->isp_split_type == ISP_VER_SPLIT && tu->width < MIN_ISP_PRED_WIDTH) { + *w = MIN_ISP_PRED_WIDTH; + has_luma = !(idx % (MIN_ISP_PRED_WIDTH / tu->width)); + } + return has_luma; +} + +static int get_chroma_predict_unit(const CodingUnit *cu, const TransformUnit *tu, const int idx, int *x0, int *y0, int *w, int *h) +{ + if (cu->isp_split_type == ISP_NO_SPLIT) { + add_tu_area(tu, x0, y0, w, h); + return 1; + } + if (idx == cu->num_intra_subpartitions - 1) { + *x0 = cu->x0; + *y0 = cu->y0; + *w = cu->cb_width; + *h = cu->cb_height; + return 1; + } + return 0; +} + +//8.4.5.1 General decoding process for intra blocks +static void predict_intra(VVCLocalContext *lc, const TransformUnit *tu, const int idx, const int target_ch_type) +{ + const VVCFrameContext *fc = lc->fc; + const CodingUnit *cu = lc->cu; + const VVCTreeType tree_type = cu->tree_type; + int x0, y0, w, h; + if (cu->pred_mode != MODE_INTRA) { + add_reconstructed_area(lc, target_ch_type, tu->x0, tu->y0, tu->width, tu->height); + return; + } + if (!target_ch_type && tree_type != DUAL_TREE_CHROMA) { + if (get_luma_predict_unit(cu, tu, idx, &x0, &y0, &w, &h)) { + ff_vvc_set_neighbour_available(lc, x0, y0, w, h); + fc->vvcdsp.intra.intra_pred(lc, x0, y0, w, h, 0); + add_reconstructed_area(lc, 0, x0, y0, w, h); + } + } + if (target_ch_type && tree_type != DUAL_TREE_LUMA) { + if (get_chroma_predict_unit(cu, tu, idx, &x0, &y0, &w, &h)){ + ff_vvc_set_neighbour_available(lc, x0, y0, w, h); + if (is_cclm(cu->intra_pred_mode_c)) { + fc->vvcdsp.intra.intra_cclm_pred(lc, x0, y0, w, h); + } else { + fc->vvcdsp.intra.intra_pred(lc, x0, y0, w, h, 1); + fc->vvcdsp.intra.intra_pred(lc, x0, y0, w, h, 2); + } + add_reconstructed_area(lc, 1, x0, y0, w, h); + } + } +} + +static void scale_clip(int *coeff, const int nzw, const int w, const int h, + const int shift, const int log2_transform_range) +{ + const int add = 1 << (shift - 1); + for (int y = 0; y < h; y++) { + int *p = coeff + y * w; + for (int x = 0; x < nzw; x++) { + *p = av_clip_intp2((*p + add) >> shift, log2_transform_range); + p++; + } + memset(p, 0, sizeof(*p) * (w - nzw)); + } +} + +static void scale(int *out, const int *in, const int w, const int h, const int shift) +{ + const int add = 1 << (shift - 1); + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + int *o = out + y * w + x; + const int *i = in + y * w + x; + *o = (*i + add) >> shift; + } + } +} + +// part of 8.7.3 Scaling process for transform coefficients +static void derive_qp(const VVCLocalContext *lc, const TransformUnit *tu, TransformBlock *tb) +{ + const VVCSPS *sps = lc->fc->ps.sps; + const H266RawSliceHeader *rsh = lc->sc->sh.r; + const CodingUnit *cu = lc->cu; + int qp, qp_act_offset; + + if (tb->c_idx == 0) { + //fix me + qp = cu->qp[LUMA] + sps->qp_bd_offset; + qp_act_offset = cu->act_enabled_flag ? -5 : 0; + } else { + const int is_jcbcr = tu->joint_cbcr_residual_flag && tu->coded_flag[CB] && tu->coded_flag[CR]; + const int idx = is_jcbcr ? JCBCR : tb->c_idx; + qp = cu->qp[idx]; + qp_act_offset = cu->act_enabled_flag ? 1 : 0; + } + if (tb->ts) { + const int qp_prime_ts_min = 4 + 6 * sps->r->sps_min_qp_prime_ts; + + tb->qp = av_clip(qp + qp_act_offset, qp_prime_ts_min, 63 + sps->qp_bd_offset); + tb->rect_non_ts_flag = 0; + tb->bd_shift = 10; + } else { + const int log_sum = tb->log2_tb_width + tb->log2_tb_height; + const int rect_non_ts_flag = log_sum & 1; + + tb->qp = av_clip(qp + qp_act_offset, 0, 63 + sps->qp_bd_offset); + tb->rect_non_ts_flag = rect_non_ts_flag; + tb->bd_shift = sps->bit_depth + rect_non_ts_flag + (log_sum / 2) + + 10 - sps->log2_transform_range + rsh->sh_dep_quant_used_flag; + } + tb->bd_offset = (1 << tb->bd_shift) >> 1; +} + +//8.7.3 Scaling process for transform coefficients +static av_always_inline int derive_scale(const TransformBlock *tb, const int sh_dep_quant_used_flag) +{ + static const uint8_t rem6[63 + 2 * 6 + 1] = { + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, + 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, + 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3 + }; + + static const uint8_t div6[63 + 2 * 6 + 1] = { + 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, + 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, + 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12 + }; + + const static int level_scale[2][6] = { + { 40, 45, 51, 57, 64, 72 }, + { 57, 64, 72, 80, 90, 102 } + }; + const int addin = sh_dep_quant_used_flag && !tb->ts; + const int qp = tb->qp + addin; + + return level_scale[tb->rect_non_ts_flag][rem6[qp]] << div6[qp]; +} + +//8.7.3 Scaling process for transform coefficients +static const uint8_t* derive_scale_m(const VVCLocalContext *lc, const TransformBlock *tb, uint8_t *scale_m) +{ + //Table 38 – Specification of the scaling matrix identifier variable id according to predMode, cIdx, nTbW, and nTbH + const int ids[2][3][6] = { + { + { 0, 2, 8, 14, 20, 26 }, + { 0, 3, 9, 15, 21, 21 }, + { 0, 4, 10, 16, 22, 22 } + }, + { + { 0, 5, 11, 17, 23, 27 }, + { 0, 6, 12, 18, 24, 24 }, + { 1, 7, 13, 19, 25, 25 }, + } + }; + const VVCFrameParamSets *ps = &lc->fc->ps; + const VVCSPS *sps = ps->sps; + const H266RawSliceHeader *rsh = lc->sc->sh.r; + const CodingUnit *cu = lc->cu; + const VVCScalingList *sl = ps->sl; + const int id = ids[cu->pred_mode != MODE_INTRA][tb->c_idx][FFMAX(tb->log2_tb_height, tb->log2_tb_width) - 1]; + const int log2_matrix_size = (id < 2) ? 1 : (id < 8) ? 2 : 3; + uint8_t *p = scale_m; + + av_assert0(!sps->r->sps_scaling_matrix_for_alternative_colour_space_disabled_flag); + + if (!rsh->sh_explicit_scaling_list_used_flag || tb->ts || + sps->r->sps_scaling_matrix_for_lfnst_disabled_flag && cu->apply_lfnst_flag[tb->c_idx]) + return ff_vvc_default_scale_m; + + if (!sl) { + av_log(lc->fc->log_ctx, AV_LOG_WARNING, "bug: no scaling list aps, id = %d", ps->ph.r->ph_scaling_list_aps_id); + return ff_vvc_default_scale_m; + } + + for (int y = tb->min_scan_y; y <= tb->max_scan_y; y++) { + const int off = y << log2_matrix_size >> tb->log2_tb_height << log2_matrix_size; + const uint8_t *m = &sl->scaling_matrix_rec[id][off]; + + for (int x = tb->min_scan_x; x <= tb->max_scan_x; x++) + *p++ = m[x << log2_matrix_size >> tb->log2_tb_width]; + } + if (id >= SL_START_16x16 && !tb->min_scan_x && !tb->min_scan_y) + *scale_m = sl->scaling_matrix_dc_rec[id - SL_START_16x16]; + + return scale_m; +} + +//8.7.3 Scaling process for transform coefficients +static av_always_inline int scale_coeff(const TransformBlock *tb, int coeff, + const int scale, const int scale_m, const int log2_transform_range) +{ + coeff = (coeff * scale * scale_m + tb->bd_offset) >> tb->bd_shift; + coeff = av_clip_intp2(coeff, log2_transform_range); + return coeff; +} + +static void dequant(const VVCLocalContext *lc, const TransformUnit *tu, TransformBlock *tb) +{ + uint8_t tmp[MAX_TB_SIZE * MAX_TB_SIZE]; + const H266RawSliceHeader *rsh = lc->sc->sh.r; + const VVCSPS *sps = lc->fc->ps.sps; + const uint8_t *scale_m = derive_scale_m(lc, tb, tmp); + int scale; + + derive_qp(lc, tu, tb); + scale = derive_scale(tb, rsh->sh_dep_quant_used_flag); + + for (int y = tb->min_scan_y; y <= tb->max_scan_y; y++) { + for (int x = tb->min_scan_x; x <= tb->max_scan_x; x++) { + int *coeff = tb->coeffs + y * tb->tb_width + x; + + if (*coeff) + *coeff = scale_coeff(tb, *coeff, scale, *scale_m, sps->log2_transform_range); + scale_m++; + } + } +} + +//transmatrix[0][0] +#define DCT_A 64 +static void itx_2d(const VVCFrameContext *fc, TransformBlock *tb, const enum TxType trh, const enum TxType trv) +{ + const VVCSPS *sps = fc->ps.sps; + const int w = tb->tb_width; + const int h = tb->tb_height; + const size_t nzw = tb->max_scan_x + 1; + const size_t nzh = tb->max_scan_y + 1; + const int shift[] = { 7, 5 + sps->log2_transform_range - sps->bit_depth }; + + if (w == h && nzw == 1 && nzh == 1 && trh == DCT2 && trv == DCT2) { + const int add[] = { 1 << (shift[0] - 1), 1 << (shift[1] - 1) }; + const int t = (tb->coeffs[0] * DCT_A + add[0]) >> shift[0]; + const int dc = (t * DCT_A + add[1]) >> shift[1]; + + for (int i = 0; i < w * h; i++) + tb->coeffs[i] = dc; + + return; + } + + for (int x = 0; x < nzw; x++) + fc->vvcdsp.itx.itx[trv][tb->log2_tb_height - 1](tb->coeffs + x, w, nzh); + scale_clip(tb->coeffs, nzw, w, h, shift[0], sps->log2_transform_range); + + for (int y = 0; y < h; y++) + fc->vvcdsp.itx.itx[trh][tb->log2_tb_width - 1](tb->coeffs + y * w, 1, nzw); + scale(tb->coeffs, tb->coeffs, w, h, shift[1]); +} + +static void itx_1d(const VVCFrameContext *fc, TransformBlock *tb, const enum TxType trh, const enum TxType trv) +{ + const VVCSPS *sps = fc->ps.sps; + const int w = tb->tb_width; + const int h = tb->tb_height; + const size_t nzw = tb->max_scan_x + 1; + const size_t nzh = tb->max_scan_y + 1; + + if ((w > 1 && nzw == 1 && trh == DCT2) || (h > 1 && nzh == 1 && trv == DCT2)) { + const int shift = 6 + sps->log2_transform_range - sps->bit_depth; + const int add = 1 << (shift - 1); + const int dc = (tb->coeffs[0] * DCT_A + add) >> shift; + + for (int i = 0; i < w * h; i++) + tb->coeffs[i] = dc; + + return; + } + + if (w > 1) + fc->vvcdsp.itx.itx[trh][tb->log2_tb_width - 1](tb->coeffs, 1, nzw); + else + fc->vvcdsp.itx.itx[trv][tb->log2_tb_height - 1](tb->coeffs, 1, nzh); + scale(tb->coeffs, tb->coeffs, w, h, 6 + sps->log2_transform_range - sps->bit_depth); +} + +static void transform_bdpcm(TransformBlock *tb, const VVCLocalContext *lc, const CodingUnit *cu) +{ + const VVCSPS *sps = lc->fc->ps.sps; + const IntraPredMode mode = tb->c_idx ? cu->intra_pred_mode_c : cu->intra_pred_mode_y; + const int vertical = mode == INTRA_VERT; + lc->fc->vvcdsp.itx.transform_bdpcm(tb->coeffs, tb->tb_width, tb->tb_height, + vertical, sps->log2_transform_range); + if (vertical) + tb->max_scan_y = tb->tb_height - 1; + else + tb->max_scan_x = tb->tb_width - 1; +} + +static void itransform(VVCLocalContext *lc, TransformUnit *tu, const int tu_idx, const int target_ch_type) +{ + const VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const VVCSH *sh = &lc->sc->sh; + const CodingUnit *cu = lc->cu; + const int ps = fc->ps.sps->pixel_shift; + DECLARE_ALIGNED(32, int, temp)[MAX_TB_SIZE * MAX_TB_SIZE]; + + for (int i = 0; i < tu->nb_tbs; i++) { + TransformBlock *tb = &tu->tbs[i]; + const int c_idx = tb->c_idx; + const int ch_type = c_idx > 0; + + if (ch_type == target_ch_type && tb->has_coeffs) { + const int w = tb->tb_width; + const int h = tb->tb_height; + const int chroma_scale = ch_type && sh->r->sh_lmcs_used_flag && fc->ps.ph.r->ph_chroma_residual_scale_flag && (w * h > 4); + const ptrdiff_t stride = fc->frame->linesize[c_idx]; + const int hs = sps->hshift[c_idx]; + const int vs = sps->vshift[c_idx]; + uint8_t *dst = &fc->frame->data[c_idx][(tb->y0 >> vs) * stride + ((tb->x0 >> hs) << ps)]; + + if (cu->bdpcm_flag[tb->c_idx]) + transform_bdpcm(tb, lc, cu); + dequant(lc, tu, tb); + if (!tb->ts) { + enum TxType trh, trv; + + if (cu->apply_lfnst_flag[c_idx]) + ilfnst_transform(lc, tb); + derive_transform_type(fc, lc, tb, &trh, &trv); + if (w > 1 && h > 1) + itx_2d(fc, tb, trh, trv); + else + itx_1d(fc, tb, trh, trv); + } + + if (chroma_scale) + fc->vvcdsp.intra.lmcs_scale_chroma(lc, temp, tb->coeffs, w, h, cu->x0, cu->y0); + fc->vvcdsp.itx.add_residual(dst, chroma_scale ? temp : tb->coeffs, w, h, stride); + + if (tu->joint_cbcr_residual_flag && tb->c_idx) + add_residual_for_joint_coding_chroma(lc, tu, tb, chroma_scale); + } + } +} + +static int reconstruct(VVCLocalContext *lc) +{ + VVCFrameContext *fc = lc->fc; + CodingUnit *cu = lc->cu; + const int start = cu->tree_type == DUAL_TREE_CHROMA; + const int end = fc->ps.sps->r->sps_chroma_format_idc && (cu->tree_type != DUAL_TREE_LUMA); + + for (int ch_type = start; ch_type <= end; ch_type++) { + TransformUnit *tu = cu->tus.head; + for (int i = 0; tu; i++) { + predict_intra(lc, tu, i, ch_type); + itransform(lc, tu, i, ch_type); + tu = tu->next; + } + } + return 0; +} + +int ff_vvc_reconstruct(VVCLocalContext *lc, const int rs, const int rx, const int ry) +{ + const VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const int x_ctb = rx << sps->ctb_log2_size_y; + const int y_ctb = ry << sps->ctb_log2_size_y; + CTU *ctu = fc->tab.ctus + rs; + CodingUnit *cu = ctu->cus; + int ret = 0; + + lc->num_ras[0] = lc->num_ras[1] = 0; + lc->lmcs.x_vpdu = -1; + lc->lmcs.y_vpdu = -1; + ff_vvc_decode_neighbour(lc, x_ctb, y_ctb, rx, ry, rs); + while (cu) { + lc->cu = cu; + + if (cu->ciip_flag) + ff_vvc_predict_ciip(lc); + if (cu->coded_flag) { + ret = reconstruct(lc); + } else { + add_reconstructed_area(lc, LUMA, cu->x0, cu->y0, cu->cb_width, cu->cb_height); + add_reconstructed_area(lc, CHROMA, cu->x0, cu->y0, cu->cb_width, cu->cb_height); + } + cu = cu->next; + } + ff_vvc_ctu_free_cus(ctu); + return ret; +} + +int ff_vvc_get_mip_size_id(const int w, const int h) +{ + if (w == 4 && h == 4) + return 0; + if ((w == 4 || h == 4) || (w == 8 && h == 8)) + return 1; + return 2; +} + +int ff_vvc_nscale_derive(const int w, const int h, const int mode) +{ + int side_size, nscale; + av_assert0(mode < INTRA_LT_CCLM && !(mode > INTRA_HORZ && mode < INTRA_VERT)); + if (mode == INTRA_PLANAR || mode == INTRA_DC || + mode == INTRA_HORZ || mode == INTRA_VERT) { + nscale = (av_log2(w) + av_log2(h) - 2) >> 2; + } else { + const int intra_pred_angle = ff_vvc_intra_pred_angle_derive(mode); + const int inv_angle = ff_vvc_intra_inv_angle_derive(intra_pred_angle); + if (mode >= INTRA_VERT) + side_size = h; + if (mode <= INTRA_HORZ) + side_size = w; + nscale = FFMIN(2, av_log2(side_size) - av_log2(3 * inv_angle - 2) + 8); + } + return nscale; +} + +int ff_vvc_need_pdpc(const int w, const int h, const uint8_t bdpcm_flag, const int mode, const int ref_idx) +{ + av_assert0(mode < INTRA_LT_CCLM); + if ((w >= 4 && h >= 4) && !ref_idx && !bdpcm_flag) { + int nscale; + if (mode == INTRA_PLANAR || mode == INTRA_DC || + mode == INTRA_HORZ || mode == INTRA_VERT) + return 1; + if (mode > INTRA_HORZ && mode < INTRA_VERT) + return 0; + nscale = ff_vvc_nscale_derive(w, h, mode); + return nscale >= 0; + + } + return 0; +} + +static const ReconstructedArea* get_reconstructed_area(const VVCLocalContext *lc, const int x, const int y, const int c_idx) +{ + const int ch_type = c_idx > 0; + for (int i = lc->num_ras[ch_type] - 1; i >= 0; i--) { + const ReconstructedArea* a = &lc->ras[ch_type][i]; + const int r = (a->x + a->w); + const int b = (a->y + a->h); + if (a->x <= x && x < r && a->y <= y && y < b) + return a; + + //it's too far away, no need check it; + if (x >= r && y >= b) + break; + } + return NULL; +} + +int ff_vvc_get_top_available(const VVCLocalContext *lc, const int x, const int y, int target_size, const int c_idx) +{ + const VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const int hs = sps->hshift[c_idx]; + const int vs = sps->vshift[c_idx]; + const int log2_ctb_size_v = sps->ctb_log2_size_y - vs; + const int end_of_ctb_x = ((lc->cu->x0 >> sps->ctb_log2_size_y) + 1) << sps->ctb_log2_size_y; + const int y0b = av_mod_uintp2(y, log2_ctb_size_v); + const int max_x = FFMIN(fc->ps.pps->width, end_of_ctb_x) >> hs; + const ReconstructedArea *a; + int px = x; + + if (!y0b) { + if (!lc->ctb_up_flag) + return 0; + target_size = FFMIN(target_size, (lc->end_of_tiles_x >> hs) - x); + if (sps->r->sps_entropy_coding_sync_enabled_flag) + target_size = FFMIN(target_size, (end_of_ctb_x >> hs) - x); + return target_size; + } + + target_size = FFMAX(0, FFMIN(target_size, max_x - x)); + while (target_size > 0 && (a = get_reconstructed_area(lc, px, y - 1, c_idx))) { + const int sz = FFMIN(target_size, a->x + a->w - px); + px += sz; + target_size -= sz; + } + return px - x; +} + +int ff_vvc_get_left_available(const VVCLocalContext *lc, const int x, const int y, int target_size, const int c_idx) +{ + const VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const int hs = sps->hshift[c_idx]; + const int vs = sps->vshift[c_idx]; + const int log2_ctb_size_h = sps->ctb_log2_size_y - hs; + const int x0b = av_mod_uintp2(x, log2_ctb_size_h); + const int end_of_ctb_y = ((lc->cu->y0 >> sps->ctb_log2_size_y) + 1) << sps->ctb_log2_size_y; + const int max_y = FFMIN(fc->ps.pps->height, end_of_ctb_y) >> vs; + const ReconstructedArea *a; + int py = y; + + if (!x0b && !lc->ctb_left_flag) + return 0; + + target_size = FFMAX(0, FFMIN(target_size, max_y - y)); + if (!x0b) + return target_size; + + while (target_size > 0 && (a = get_reconstructed_area(lc, x - 1, py, c_idx))) { + const int sz = FFMIN(target_size, a->y + a->h - py); + py += sz; + target_size -= sz; + } + return py - y; +} + +static int less(const void *a, const void *b) +{ + return *(const int*)a - *(const int*)b; +} + +int ff_vvc_ref_filter_flag_derive(const int mode) +{ + static const int modes[] = { -14, -12, -10, -6, INTRA_PLANAR, 2, 34, 66, 72, 76, 78, 80}; + return bsearch(&mode, modes, FF_ARRAY_ELEMS(modes), sizeof(int), less) != NULL; +} + +int ff_vvc_intra_pred_angle_derive(const int pred_mode) +{ + static const int angles[] = { + 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, + 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512 + }; + int sign = 1, idx, intra_pred_angle; + if (pred_mode > INTRA_DIAG) { + idx = pred_mode - INTRA_VERT; + } else if (pred_mode > 0) { + idx = INTRA_HORZ - pred_mode; + } else { + idx = INTRA_HORZ - 2 - pred_mode; + } + if (idx < 0) { + idx = -idx; + sign = -1; + } + intra_pred_angle = sign * angles[idx]; + return intra_pred_angle; +} + +#define ROUND(f) (int)(f < 0 ? -(-f + 0.5) : (f + 0.5)) +int ff_vvc_intra_inv_angle_derive(const int intra_pred_angle) +{ + float inv_angle; + av_assert0(intra_pred_angle); + inv_angle = 32 * 512.0 / intra_pred_angle; + return ROUND(inv_angle); +} + +//8.4.5.2.7 Wide angle intra prediction mode mapping proces +int ff_vvc_wide_angle_mode_mapping(const CodingUnit *cu, + const int tb_width, const int tb_height, const int c_idx, int pred_mode_intra) +{ + int nw, nh, wh_ratio, min, max; + + if (cu->isp_split_type == ISP_NO_SPLIT || c_idx) { + nw = tb_width; + nh = tb_height; + } else { + nw = cu->cb_width; + nh = cu->cb_height; + } + wh_ratio = FFABS(ff_log2(nw) - ff_log2(nh)); + max = (wh_ratio > 1) ? (8 + 2 * wh_ratio) : 8; + min = (wh_ratio > 1) ? (60 - 2 * wh_ratio) : 60; + + if (nw > nh && pred_mode_intra >=2 && pred_mode_intra < max) + pred_mode_intra += 65; + else if (nh > nw && pred_mode_intra <= 66 && pred_mode_intra > min) + pred_mode_intra -= 67; + return pred_mode_intra; +} diff --git a/libavcodec/vvc/vvc_intra.h b/libavcodec/vvc/vvc_intra.h new file mode 100644 index 0000000000..6b674008f9 --- /dev/null +++ b/libavcodec/vvc/vvc_intra.h @@ -0,0 +1,49 @@ +/* + * VVC intra prediction + * + * Copyright (C) 2021 Nuo Mi + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef AVCODEC_VVC_VVC_INTRA_H +#define AVCODEC_VVC_VVC_INTRA_H + +#include "vvc_ctu.h" + +/** + * reconstruct a CTU + * @param lc local context for CTU + * @param rs raster order for the CTU. + * @param rx raster order x for the CTU. + * @param ry raster order y for the CTU. + * @return AVERROR + */ +int ff_vvc_reconstruct(VVCLocalContext *lc, const int rs, const int rx, const int ry); + +//utils for vvc_intra_template +int ff_vvc_get_top_available(const VVCLocalContext *lc, int x0, int y0, int target_size, int c_idx); +int ff_vvc_get_left_available(const VVCLocalContext *lc, int x0, int y0, int target_size, int c_idx); +int ff_vvc_get_mip_size_id(int w, int h); +int ff_vvc_need_pdpc(int w, int h, uint8_t bdpcm_flag, int mode, int ref_idx); +int ff_vvc_nscale_derive(int w, int h, int mode); +int ff_vvc_ref_filter_flag_derive(int mode); +int ff_vvc_intra_pred_angle_derive(int pred_mode); +int ff_vvc_intra_inv_angle_derive(int pred_mode); +int ff_vvc_wide_angle_mode_mapping(const CodingUnit *cu, + int tb_width, int tb_height, int c_idx, int pred_mode_intra); + +#endif // AVCODEC_VVC_VVC_INTRA_H diff --git a/libavcodec/vvc/vvc_intra_template.c b/libavcodec/vvc/vvc_intra_template.c new file mode 100644 index 0000000000..9fb47549d5 --- /dev/null +++ b/libavcodec/vvc/vvc_intra_template.c @@ -0,0 +1,1015 @@ +/* + * VVC intra prediction DSP + * + * Copyright (C) 2021-2023 Nuomi + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavcodec/bit_depth_template.c" + +#include "vvc_intra.h" + +#define POS(x, y) src[(x) + stride * (y)] + +static av_always_inline void FUNC(cclm_linear_pred)(VVCFrameContext *fc, const int x0, const int y0, + const int w, const int h, const pixel* pdsy, const int *a, const int *b, const int *k) +{ + const VVCSPS *sps = fc->ps.sps; + for (int i = 0; i < VVC_MAX_SAMPLE_ARRAYS - 1; i++) { + const int c_idx = i + 1; + const int x = x0 >> sps->hshift[c_idx]; + const int y = y0 >> sps->vshift[c_idx]; + const ptrdiff_t stride = fc->frame->linesize[c_idx] / sizeof(pixel); + pixel *src = (pixel*)fc->frame->data[c_idx] + x + y * stride; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + const int dsy = pdsy[y * w + x]; + const int pred = ((dsy * a[i]) >> k[i]) + b[i]; + POS(x, y) = CLIP(pred); + } + } + } +} + +#define MAX_PICK_POS 4 +#define TOP 0 +#define LEFT 1 + +static av_always_inline void FUNC(cclm_get_params_default)(int *a, int *b, int *k) +{ + for (int i = 0; i < 2; i++) { + a[i] = k[i] = 0; + b[i] = 1 << (BIT_DEPTH - 1); + } +} + +static av_always_inline int FUNC(cclm_get_select_pos)(const VVCLocalContext *lc, + const int x, const int y, const int w, const int h, const int avail_t, const int avail_l, + int cnt[2], int pos[2][MAX_PICK_POS]) +{ + const enum IntraPredMode mode = lc->cu->intra_pred_mode_c; + const int num_is4 = !avail_t || !avail_l || mode != INTRA_LT_CCLM; + int num_samp[2]; + + if (mode == INTRA_LT_CCLM) { + num_samp[TOP] = avail_t ? w : 0; + num_samp[LEFT] = avail_l ? h : 0; + } else { + num_samp[TOP] = (avail_t && mode == INTRA_T_CCLM) ? ff_vvc_get_top_available(lc, x, y, w + FFMIN(w, h), 1) : 0; + num_samp[LEFT] = (avail_l && mode == INTRA_L_CCLM) ? ff_vvc_get_left_available(lc, x, y, h + FFMIN(w, h), 1) : 0; + } + if (!num_samp[TOP] && !num_samp[LEFT]) { + return 0; + } + for (int i = TOP; i <= LEFT; i++) { + const int start = num_samp[i] >> (2 + num_is4); + const int step = FFMAX(1, num_samp[i] >> (1 + num_is4)) ; + cnt[i] = FFMIN(num_samp[i], (1 + num_is4) << 1); + for (int c = 0; c < cnt[i]; c++) + pos[i][c] = start + c * step; + } + return 1; +} + +static av_always_inline void FUNC(cclm_select_luma_444)(const pixel *src, const int step, + const int cnt, const int pos[MAX_PICK_POS], pixel *sel_luma) +{ + for (int i = 0; i < cnt; i++) + sel_luma[i] = src[pos[i] * step]; +} + +static av_always_inline void FUNC(cclm_select_luma)(const VVCFrameContext *fc, + const int x0, const int y0, const int avail_t, const int avail_l, const int cnt[2], const int pos[2][MAX_PICK_POS], + pixel *sel_luma) +{ + const VVCSPS *sps = fc->ps.sps; + + const int b_ctu_boundary = !av_mod_uintp2(y0, sps->ctb_log2_size_y); + const int hs = sps->hshift[1]; + const int vs = sps->vshift[1]; + const ptrdiff_t stride = fc->frame->linesize[0] / sizeof(pixel); + + if (!hs && !vs) { + const pixel* src = (pixel*)fc->frame->data[0] + x0 + y0 * stride; + FUNC(cclm_select_luma_444)(src - avail_t * stride, 1, cnt[TOP], pos[TOP], sel_luma); + FUNC(cclm_select_luma_444)(src - avail_l, stride, cnt[LEFT], pos[LEFT], sel_luma + cnt[TOP]); + } else { + // top + if (vs && !b_ctu_boundary) { + const pixel *source = (pixel *)fc->frame->data[0] + x0 + (y0 - 2) * stride; + for (int i = 0; i < cnt[TOP]; i++) { + const int x = pos[TOP][i] << hs; + const pixel *src = source + x; + const int has_left = x || avail_l; + const pixel l = has_left ? POS(-1, 0) : POS(0, 0); + if (sps->r->sps_chroma_vertical_collocated_flag) { + sel_luma[i] = (POS(0, -1) + l + 4 * POS(0, 0) + POS(1, 0) + POS(0, 1) + 4) >> 3; + } else { + const pixel l1 = has_left ? POS(-1, 1) : POS(0, 1); + sel_luma[i] = (l + l1 + 2 * (POS(0, 0) + POS(0, 1)) + POS(1, 0) + POS(1, 1) + 4) >> 3; + } + } + } else { + const pixel *source = (pixel*)fc->frame->data[0] + x0 + (y0 - 1) * stride; + for (int i = 0; i < cnt[TOP]; i++) { + const int x = pos[TOP][i] << hs; + const pixel *src = source + x; + const int has_left = x || avail_l; + const pixel l = has_left ? POS(-1, 0) : POS(0, 0); + sel_luma[i] = (l + 2 * POS(0, 0) + POS(1, 0) + 2) >> 2; + } + } + + // left + { + const pixel *left; + const pixel *source = (pixel *)fc->frame->data[0] + x0 + y0 * stride - (1 + hs) * avail_l; + left = source - avail_l; + + for (int i = 0; i < cnt[LEFT]; i++) { + const int y = pos[LEFT][i] << vs; + const int offset = y * stride; + const pixel *l = left + offset; + const pixel *src = source + offset; + pixel pred; + if (!vs) { + pred = (*l + 2 * POS(0, 0) + POS(1, 0) + 2) >> 2; + } else { + if (sps->r->sps_chroma_vertical_collocated_flag) { + const int has_top = y || avail_t; + const pixel t = has_top ? POS(0, -1) : POS(0, 0); + pred = (*l + t + 4 * POS(0, 0) + POS(1, 0) + POS(0, 1) + 4) >> 3; + } else { + pred = (*l + *(l + stride) + 2 * POS(0, 0) + 2 * POS(0, 1) + POS(1, 0) + POS(1, 1) + 4) >> 3; + } + } + sel_luma[i + cnt[TOP]] = pred; + } + } + } +} + +static av_always_inline void FUNC(cclm_select_chroma)(const VVCFrameContext *fc, + const int x, const int y, const int cnt[2], const int pos[2][MAX_PICK_POS], + pixel sel[][MAX_PICK_POS * 2]) +{ + for (int c_idx = 1; c_idx < VVC_MAX_SAMPLE_ARRAYS; c_idx++) { + const ptrdiff_t stride = fc->frame->linesize[c_idx] / sizeof(pixel); + + //top + const pixel *src = (pixel*)fc->frame->data[c_idx] + x + (y - 1)* stride; + for (int i = 0; i < cnt[TOP]; i++) { + sel[c_idx][i] = src[pos[TOP][i]]; + } + + //left + src = (pixel*)fc->frame->data[c_idx] + x - 1 + y * stride; + for (int i = 0; i < cnt[LEFT]; i++) { + sel[c_idx][i + cnt[TOP]] = src[pos[LEFT][i] * stride]; + } + } +} + +static av_always_inline int FUNC(cclm_select_samples)(const VVCLocalContext *lc, + const int x0, const int y0, const int w, const int h, const int avail_t, const int avail_l, + pixel sel[][MAX_PICK_POS * 2]) +{ + const VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const int x = x0 >> sps->hshift[1]; + const int y = y0 >> sps->vshift[1]; + int cnt[2], pos[2][MAX_PICK_POS]; + + if (!FUNC(cclm_get_select_pos)(lc, x, y, w, h, avail_t, avail_l, cnt, pos)) + return 0; + + FUNC(cclm_select_luma)(fc, x0, y0, avail_t, avail_l, cnt, pos, sel[LUMA]); + FUNC(cclm_select_chroma)(fc, x, y, cnt, pos, sel); + + if (cnt[TOP] + cnt[LEFT] == 2) { + for (int c_idx = 0; c_idx < VVC_MAX_SAMPLE_ARRAYS; c_idx++) { + sel[c_idx][3] = sel[c_idx][0]; + sel[c_idx][2] = sel[c_idx][1]; + sel[c_idx][0] = sel[c_idx][1]; + sel[c_idx][1] = sel[c_idx][3]; + } + } + return 1; +} + +static av_always_inline void FUNC(cclm_get_min_max)( + const pixel sel[][MAX_PICK_POS * 2], int *min, int *max) +{ + int min_grp_idx[] = { 0, 2 }; + int max_grp_idx[] = { 1, 3 }; + + if (sel[LUMA][min_grp_idx[0]] > sel[LUMA][min_grp_idx[1]]) + FFSWAP(int, min_grp_idx[0], min_grp_idx[1]); + if (sel[LUMA][max_grp_idx[0]] > sel[LUMA][max_grp_idx[1]]) + FFSWAP(int, max_grp_idx[0], max_grp_idx[1]); + if (sel[LUMA][min_grp_idx[0]] > sel[LUMA][max_grp_idx[1]]) { + FFSWAP(int, min_grp_idx[0], max_grp_idx[0]); + FFSWAP(int, min_grp_idx[1], max_grp_idx[1]); + } + if (sel[LUMA][min_grp_idx[1]] > sel[LUMA][max_grp_idx[0]]) + FFSWAP(int, min_grp_idx[1], max_grp_idx[0]); + for (int c_idx = 0; c_idx < VVC_MAX_SAMPLE_ARRAYS; c_idx++) { + max[c_idx] = (sel[c_idx][max_grp_idx[0]] + sel[c_idx][max_grp_idx[1]] + 1) >> 1; + min[c_idx] = (sel[c_idx][min_grp_idx[0]] + sel[c_idx][min_grp_idx[1]] + 1) >> 1; + } +} + +static av_always_inline void FUNC(cclm_get_params)(const VVCLocalContext *lc, + const int x0, const int y0, const int w, const int h, const int avail_t, const int avail_l, + int *a, int *b, int *k) +{ + pixel sel[VVC_MAX_SAMPLE_ARRAYS][MAX_PICK_POS * 2]; + int max[VVC_MAX_SAMPLE_ARRAYS], min[VVC_MAX_SAMPLE_ARRAYS]; + int diff; + + if (!FUNC(cclm_select_samples)(lc, x0, y0, w, h, avail_t, avail_l, sel)) { + FUNC(cclm_get_params_default)(a, b, k); + return; + } + + FUNC(cclm_get_min_max)(sel, min, max); + + diff = max[LUMA] - min[LUMA]; + if (diff == 0) { + for (int i = 0; i < 2; i++) { + a[i] = k[i] = 0; + b[i] = min[i + 1]; + } + return; + } + for (int i = 0; i < 2; i++) { + const static int div_sig_table[] = {0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0}; + const int diffc = max[i + 1] - min[i + 1]; + int x = av_log2(diff); + int y, v, sign, add; + const int norm_diff = ((diff << 4) >> x) & 15; + x += (norm_diff) ? 1 : 0; + y = abs(diffc) > 0 ? av_log2(abs(diffc)) + 1 : 0; + v = div_sig_table[norm_diff] | 8; + add = (1 << y >> 1); + a[i] = (diffc * v + add) >> y; + k[i] = FFMAX(1, 3 + x -y); + sign = a[i] < 0 ? -1 : (a[i] > 0); + a[i] = ((3 + x - y) < 1) ? sign * 15 : a[i]; + b[i] = min[i + 1] - ((a[i] * min[0]) >> k[i]); + } + +} + +#undef TOP +#undef LEFT + +static av_always_inline void FUNC(cclm_get_luma_rec_pixels)(const VVCFrameContext *fc, + const int x0, const int y0, const int w, const int h, const int avail_t, const int avail_l, + pixel *pdsy) +{ + const int hs = fc->ps.sps->hshift[1]; + const int vs = fc->ps.sps->vshift[1]; + const ptrdiff_t stride = fc->frame->linesize[0] / sizeof(pixel); + const pixel *source = (pixel*)fc->frame->data[0] + x0 + y0 * stride; + const pixel *left = source - avail_l; + const pixel *top = source - avail_t * stride; + + const VVCSPS *sps = fc->ps.sps; + if (!hs && !vs) { + for (int i = 0; i < h; i++) + memcpy(pdsy + i * w, source + i * stride, w * sizeof(pixel)); + return; + } + for (int i = 0; i < h; i++) { + const pixel *src = source; + const pixel *l = left; + const pixel *t = top; + if (!vs) { + for (int j = 0; j < w; j++) { + pixel pred = (*l + 2 * POS(0, 0) + POS(1, 0) + 2) >> 2; + pdsy[i * w + j] = pred; + src += 2; + l = src - 1; + } + + } else { + if (sps->r->sps_chroma_vertical_collocated_flag) { + for (int j = 0; j < w; j++) { + pixel pred = (*l + *t + 4 * POS(0, 0) + POS(1, 0) + POS(0, 1) + 4) >> 3; + pdsy[i * w + j] = pred; + src += 2; + t += 2; + l = src - 1; + } + } else { + for (int j = 0; j < w; j++) { + pixel pred = (*l + *(l + stride) + 2 * POS(0, 0) + 2 * POS(0, 1) + POS(1, 0) + POS(1, 1) + 4) >> 3; + + pdsy[i * w + j] = pred; + src += 2; + l = src - 1; + } + } + } + source += (stride << vs); + left += (stride << vs); + top = source - stride; + } +} + +static av_always_inline void FUNC(cclm_pred_default)(VVCFrameContext *fc, + const int x, const int y, const int w, const int h, const int avail_t, const int avail_l) +{ + for (int c_idx = 1; c_idx < VVC_MAX_SAMPLE_ARRAYS; c_idx++) { + const ptrdiff_t stride = fc->frame->linesize[c_idx] / sizeof(pixel); + pixel *dst = (pixel*)fc->frame->data[c_idx] + x + y * stride; + for (int i = 0; i < h; i++) { + for (int j = 0; j < w; j++) { + dst[j] = 1 << (BIT_DEPTH - 1); + } + dst += stride; + } + } +} + +//8.4.5.2.14 Specification of INTRA_LT_CCLM, INTRA_L_CCLM and INTRA_T_CCLM intra prediction mode +static void FUNC(intra_cclm_pred)(const VVCLocalContext *lc, const int x0, const int y0, + const int width, const int height) +{ + VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const int avail_t = ff_vvc_get_top_available(lc, x0, y0, 1, 0); + const int avail_l = ff_vvc_get_left_available(lc, x0, y0, 1, 0); + const int hs = sps->hshift[1]; + const int vs = sps->vshift[1]; + const int x = x0 >> hs; + const int y = y0 >> vs; + const int w = width >> hs; + const int h = height >> vs; + int a[2], b[2], k[2]; + + pixel dsy[MAX_TB_SIZE * MAX_TB_SIZE]; + if (!avail_t && !avail_l) { + FUNC(cclm_pred_default)(fc, x, y, w, h, avail_t, avail_l); + return; + } + FUNC(cclm_get_luma_rec_pixels)(fc, x0, y0, w, h, avail_t, avail_l, dsy); + FUNC(cclm_get_params) (lc, x0, y0, w, h, avail_t, avail_l, a, b, k); + FUNC(cclm_linear_pred)(fc, x0, y0, w, h, dsy, a, b, k); +} + +static int FUNC(lmcs_sum_samples)(const pixel *start, ptrdiff_t stride, const int avail, const int target_size) +{ + const int size = FFMIN(avail, target_size); + int sum = 0; + for (int i = 0; i < size; i++) { + sum += *start; + start += stride; + } + sum += *(start - stride) * (target_size - size); + return sum; +} + +// 8.7.5.3 Picture reconstruction with luma dependent chroma residual scaling process for chroma samples +static int FUNC(lmcs_derive_chroma_scale)(VVCLocalContext *lc, const int x0, const int y0) +{ + VVCFrameContext *fc = lc->fc; + const VVCLMCS *lmcs = &fc->ps.lmcs; + const int size_y = FFMIN(fc->ps.sps->ctb_size_y, 64); + + const int x = x0 & ~(size_y - 1); + const int y = y0 & ~(size_y - 1); + if (lc->lmcs.x_vpdu != x || lc->lmcs.y_vpdu != y) { + int cnt = 0, luma = 0, i; + const pixel *src = (const pixel *)(fc->frame->data[LUMA] + y * fc->frame->linesize[LUMA] + (x << fc->ps.sps->pixel_shift)); + const ptrdiff_t stride = fc->frame->linesize[LUMA] / sizeof(pixel); + const int avail_t = ff_vvc_get_top_available (lc, x, y, 1, 0); + const int avail_l = ff_vvc_get_left_available(lc, x, y, 1, 0); + if (avail_l) { + luma += FUNC(lmcs_sum_samples)(src - 1, stride, fc->ps.pps->height - y, size_y); + cnt = size_y; + } + if (avail_t) { + luma += FUNC(lmcs_sum_samples)(src - stride, 1, fc->ps.pps->width - x, size_y); + cnt += size_y; + } + if (cnt) + luma = (luma + (cnt >> 1)) >> av_log2(cnt); + else + luma = 1 << (BIT_DEPTH - 1); + + for (i = lmcs->min_bin_idx; i <= lmcs->max_bin_idx; i++) { + if (luma < lmcs->pivot[i + 1]) + break; + } + i = FFMIN(i, LMCS_MAX_BIN_SIZE - 1); + + lc->lmcs.chroma_scale = lmcs->chroma_scale_coeff[i]; + lc->lmcs.x_vpdu = x; + lc->lmcs.y_vpdu = y; + } + return lc->lmcs.chroma_scale; +} + +// 8.7.5.3 Picture reconstruction with luma dependent chroma residual scaling process for chroma samples +static void FUNC(lmcs_scale_chroma)(VVCLocalContext *lc, int *dst, const int *coeff, + const int width, const int height, const int x0_cu, const int y0_cu) +{ + const int chroma_scale = FUNC(lmcs_derive_chroma_scale)(lc, x0_cu, y0_cu); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + const int c = av_clip_intp2(*coeff, BIT_DEPTH); + + if (c > 0) + *dst = (c * chroma_scale + (1 << 10)) >> 11; + else + *dst = -((-c * chroma_scale + (1 << 10)) >> 11); + coeff++; + dst++; + } + } +} + +static av_always_inline void FUNC(ref_filter)(const pixel *left, const pixel *top, + pixel *filtered_left, pixel *filtered_top, const int left_size, const int top_size, + const int unfilter_last_one) +{ + filtered_left[-1] = filtered_top[-1] = (left[0] + 2 * left[-1] + top[0] + 2 ) >> 2; + for (int i = 0; i < left_size - unfilter_last_one; i++) { + filtered_left[i] = (left[i- 1] + 2 * left[i] + left[i + 1] + 2) >> 2; + } + for (int i = 0; i < top_size - unfilter_last_one; i++) { + filtered_top[i] = (top[i-1] + 2 * top[i] + top[i + 1] + 2) >> 2; + } + if (unfilter_last_one) { + filtered_top[top_size - 1] = top[top_size - 1]; + filtered_left[left_size - 1] = left[left_size - 1]; + } +} + +static av_always_inline void FUNC(prepare_intra_edge_params)(const VVCLocalContext *lc, + IntraEdgeParams* edge, const pixel *src, const ptrdiff_t stride, + const int x, int y, int w, int h, int c_idx, const int is_intra_mip, + const int mode, const int ref_idx, const int need_pdpc) +{ +#define EXTEND(ptr, val, len) \ +do { \ + for (i = 0; i < (len); i++) \ + *(ptr + i) = val; \ +} while (0) + const CodingUnit *cu = lc->cu; + const int ref_filter_flag = is_intra_mip ? 0 : ff_vvc_ref_filter_flag_derive(mode); + const int filter_flag = !ref_idx && w * h > 32 && !c_idx && + cu->isp_split_type == ISP_NO_SPLIT && ref_filter_flag; + int cand_up_left = lc->na.cand_up_left; + pixel *left = (pixel*)edge->left_array + MAX_TB_SIZE + 3; + pixel *top = (pixel*)edge->top_array + MAX_TB_SIZE + 3; + pixel *filtered_left = (pixel*)edge->filtered_left_array + MAX_TB_SIZE + 3; + pixel *filtered_top = (pixel*)edge->filtered_top_array + MAX_TB_SIZE + 3; + const int ref_line = ref_idx == 3 ? -4 : (-1 - ref_idx); + int left_size, top_size, unfilter_left_size, unfilter_top_size; + int left_available, top_available; + int refw, refh; + int intra_pred_angle, inv_angle; + int i; + + if (is_intra_mip || mode == INTRA_PLANAR) { + left_size = h + 1; + top_size = w + 1; + unfilter_left_size = left_size + filter_flag; + unfilter_top_size = top_size + filter_flag; + } else if (mode == INTRA_DC) { + unfilter_left_size = left_size = h; + unfilter_top_size = top_size = w; + } else if (mode == INTRA_VERT) { + //we may need 1 pixel to predict the top left. + unfilter_left_size = left_size = need_pdpc ? h : 1; + unfilter_top_size = top_size = w; + } else if (mode == INTRA_HORZ) { + unfilter_left_size = left_size = h; + //even need_pdpc == 0, we may need 1 pixel to predict the top left. + unfilter_top_size = top_size = need_pdpc ? w : 1; + } else { + if (cu->isp_split_type == ISP_NO_SPLIT || c_idx) { + refw = w * 2; + refh = h * 2; + } else { + refw = cu->cb_width + w; + refh = cu->cb_height + h; + } + intra_pred_angle = ff_vvc_intra_pred_angle_derive(mode); + inv_angle = ff_vvc_intra_inv_angle_derive(intra_pred_angle); + unfilter_top_size = top_size = refw; + unfilter_left_size = left_size = refh; + } + + left_available = ff_vvc_get_left_available(lc, x, y, unfilter_left_size, c_idx); + for (i = 0; i < left_available; i++) + left[i] = POS(ref_line, i); + + top_available = ff_vvc_get_top_available(lc, x, y, unfilter_top_size, c_idx); + memcpy(top, src + ref_line * stride, top_available * sizeof(pixel)); + + for (int i = -1; i >= ref_line; i--) { + if (cand_up_left) { + left[i] = POS(ref_line, i); + top[i] = POS(i, ref_line); + } else if (left_available) { + left[i] = top[i] = left[0]; + } else if (top_available) { + left[i] = top[i] = top[0]; + } else { + left[i] = top[i] = 1 << (BIT_DEPTH - 1); + } + } + + EXTEND(top + top_available, top[top_available-1], unfilter_top_size - top_available); + EXTEND(left + left_available, left[left_available-1], unfilter_left_size - left_available); + + if (ref_filter_flag) { + if (!ref_idx && w * h > 32 && !c_idx && cu->isp_split_type == ISP_NO_SPLIT ) { + const int unfilter_last_one = left_size == unfilter_left_size; + FUNC(ref_filter)(left, top, filtered_left, filtered_top, unfilter_left_size, unfilter_top_size, unfilter_last_one); + left = filtered_left; + top = filtered_top; + } + } + if (!is_intra_mip && mode != INTRA_PLANAR && mode != INTRA_DC) { + if (ref_filter_flag || ref_idx || cu->isp_split_type != ISP_NO_SPLIT) { + edge->filter_flag = 0; + } else { + const int min_dist_ver_hor = FFMIN(abs(mode - 50), abs(mode - 18)); + const int intra_hor_ver_dist_thres[] = {24, 14, 2, 0, 0}; + const int ntbs = (av_log2(w) + av_log2(h)) >> 1; + edge->filter_flag = min_dist_ver_hor > intra_hor_ver_dist_thres[ntbs - 2]; + } + + if (mode != INTRA_VERT && mode != INTRA_HORZ) { + if (mode >= INTRA_DIAG) { + if (intra_pred_angle < 0) { + pixel *p = top - (ref_idx + 1); + for (int x = -h; x < 0; x++) { + const int idx = -1 - ref_idx + FFMIN((x*inv_angle + 256) >> 9, h); + p[x] = left[idx]; + } + } else { + for (int i = refw; i <= refw + FFMAX(1, w/h) * ref_idx + 1; i++) + top[i] = top[refw - 1]; + } + } else { + if (intra_pred_angle < 0) { + pixel *p = left - (ref_idx + 1); + for (int x = -w; x < 0; x++) { + const int idx = -1 - ref_idx + FFMIN((x*inv_angle + 256) >> 9, w); + p[x] = top[idx]; + } + } else { + for (int i = refh; i <= refh + FFMAX(1, h/w) * ref_idx + 1; i++) + left[i] = left[refh - 1]; + } + } + } + } + edge->left = (uint8_t*)left; + edge->top = (uint8_t*)top; +} + +//8.4.1 General decoding process for coding units coded in intra prediction mode +static void FUNC(intra_pred)(const VVCLocalContext *lc, int x0, int y0, + const int width, const int height, int c_idx) +{ + VVCFrameContext *fc = lc->fc; + const VVCSPS *sps = fc->ps.sps; + const VVCPPS *pps = fc->ps.pps; + const CodingUnit *cu = lc->cu; + const int log2_min_cb_size = sps->min_cb_log2_size_y; + const int min_cb_width = pps->min_cb_width; + const int x_cb = x0 >> log2_min_cb_size; + const int y_cb = y0 >> log2_min_cb_size; + + const int hshift = fc->ps.sps->hshift[c_idx]; + const int vshift = fc->ps.sps->vshift[c_idx]; + const int x = x0 >> hshift; + const int y = y0 >> vshift; + const int w = width >> hshift; + const int h = height >> vshift; + const ptrdiff_t stride = fc->frame->linesize[c_idx] / sizeof(pixel); + + const int pred_mode = c_idx ? cu->intra_pred_mode_c : cu->intra_pred_mode_y; + const int mode = ff_vvc_wide_angle_mode_mapping(cu, w, h, c_idx, pred_mode); + + const int intra_mip_flag = SAMPLE_CTB(fc->tab.imf, x_cb, y_cb); + const int is_intra_mip = intra_mip_flag && (!c_idx || cu->mip_chroma_direct_flag); + const int ref_idx = c_idx ? 0 : cu->intra_luma_ref_idx; + const int need_pdpc = ff_vvc_need_pdpc(w, h, cu->bdpcm_flag[c_idx], mode, ref_idx); + + + pixel *src = (pixel*)fc->frame->data[c_idx] + x + y * stride; + IntraEdgeParams edge; + + FUNC(prepare_intra_edge_params)(lc, &edge, src, stride, x, y, w, h, c_idx, is_intra_mip, mode, ref_idx, need_pdpc); + + if (is_intra_mip) { + int intra_mip_transposed_flag = SAMPLE_CTB(fc->tab.imtf, x_cb, y_cb); + int intra_mip_mode = SAMPLE_CTB(fc->tab.imm, x_cb, y_cb); + + fc->vvcdsp.intra.pred_mip((uint8_t *)src, edge.top, edge.left, + w, h, stride, intra_mip_mode, intra_mip_transposed_flag); + } else if (mode == INTRA_PLANAR) { + fc->vvcdsp.intra.pred_planar((uint8_t *)src, edge.top, edge.left, w, h, stride); + } else if (mode == INTRA_DC) { + fc->vvcdsp.intra.pred_dc((uint8_t *)src, edge.top, edge.left, w, h, stride); + } else if (mode == INTRA_VERT) { + fc->vvcdsp.intra.pred_v((uint8_t *)src, edge.top, w, h, stride); + } else if (mode == INTRA_HORZ) { + fc->vvcdsp.intra.pred_h((uint8_t *)src, edge.left, w, h, stride); + } else { + if (mode >= INTRA_DIAG) { + fc->vvcdsp.intra.pred_angular_v((uint8_t *)src, edge.top, edge.left, + w, h, stride, c_idx, mode, ref_idx, + edge.filter_flag, need_pdpc); + } else { + fc->vvcdsp.intra.pred_angular_h((uint8_t *)src, edge.top, edge.left, + w, h, stride, c_idx, mode, ref_idx, + edge.filter_flag, need_pdpc); + } + } + if (need_pdpc) { + //8.4.5.2.15 Position-dependent intra prediction sample filtering process + if (!is_intra_mip && (mode == INTRA_PLANAR || mode == INTRA_DC || + mode == INTRA_VERT || mode == INTRA_HORZ)) { + const int scale = (av_log2(w) + av_log2(h) - 2) >> 2; + const pixel *left = (pixel*)edge.left; + const pixel *top = (pixel*)edge.top; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + int l, t, wl, wt, pred; + pixel val; + if (mode == INTRA_PLANAR || mode == INTRA_DC) { + l = left[y]; + t = top[x]; + wl = 32 >> FFMIN((x << 1) >> scale, 31); + wt = 32 >> FFMIN((y << 1) >> scale, 31); + } else { + l = left[y] - left[-1] + POS(x,y); + t = top[x] - top[-1] + POS(x,y); + wl = (mode == INTRA_VERT) ? (32 >> FFMIN((x << 1) >> scale, 31)) : 0; + wt = (mode == INTRA_HORZ) ? (32 >> FFMIN((y << 1) >> scale, 31)) : 0; + } + val = POS(x, y); + pred = val + ((wl * (l - val) + wt * (t - val) + 32) >> 6); + POS(x, y) = CLIP(pred); + } + } + } + } +} + +//8.4.5.2.11 Specification of INTRA_PLANAR intra prediction mode +static av_always_inline void FUNC(pred_planar)(uint8_t *_src, const uint8_t *_top, + const uint8_t *_left, const int w, const int h, const ptrdiff_t stride) +{ + int x, y; + pixel *src = (pixel *)_src; + const pixel *top = (const pixel *)_top; + const pixel *left = (const pixel *)_left; + const int logw = av_log2(w); + const int logh = av_log2(h); + const int size = w * h; + const int shift = (logw + logh + 1); + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + const int pred_v = ((h - 1 - y) * top[x] + (y + 1) * left[h]) << logw; + const int pred_h = ((w - 1 - x) * left[y] + (x + 1) * top[w]) << logh; + const int pred = (pred_v + pred_h + size) >> shift; + POS(x, y) = pred; + } + } +} + +//8.4.5.2.3 MIP boundary sample downsampling process +static av_always_inline void FUNC(mip_downsampling)(int *reduced, const int boundary_size, + const pixel *ref, const int n_tb_s) +{ + const int b_dwn = n_tb_s / boundary_size; + const int log2 = av_log2(b_dwn); + + if (boundary_size == n_tb_s) { + for (int i = 0; i < n_tb_s; i++) + reduced[i] = ref[i]; + return; + } + for (int i = 0; i < boundary_size; i++) { + int r; + r = *ref++; + for (int j = 1; j < b_dwn; j++) + r += *ref++; + reduced[i] = (r + (1 << (log2 - 1))) >> log2; + } +} + +static av_always_inline void FUNC(mip_reduced_pred)(pixel *src, const ptrdiff_t stride, + const int up_hor, const int up_ver, const int pred_size, const int *reduced, const int reduced_size, + const int ow, const int temp0, const uint8_t *matrix, int is_transposed) +{ + src = &POS(up_hor - 1, up_ver - 1); + for (int y = 0; y < pred_size; y++) { + for (int x = 0; x < pred_size; x++) { + int pred = 0; + for (int i = 0; i < reduced_size; i++) + pred += reduced[i] * matrix[i]; + matrix += reduced_size; + pred = ((pred + ow) >> 6) + temp0; + pred = av_clip(pred, 0, (1< 1 || up_ver > 1) { + if (up_hor > 1) + FUNC(mip_upsampling_1d)(&POS(0, up_ver - 1), 1, up_ver * stride, pred_size, up_hor, left + up_ver - 1, up_ver, pred_size); + if (up_ver > 1) + FUNC(mip_upsampling_1d)(src, stride, 1, w, up_ver, top, 1, pred_size); + } +} + +static av_always_inline pixel FUNC(pred_dc_val)(const pixel *top, const pixel *left, + const int w, const int h) +{ + pixel dc_val; + int sum = 0; + unsigned int offset = (w == h) ? (w << 1) : FFMAX(w, h); + const int shift = av_log2(offset); + offset >>= 1; + if (w >= h) { + for (int i = 0; i < w; i++) + sum += top[i]; + } + if (w <= h) { + for (int i = 0; i < h; i++) + sum += left[i]; + } + dc_val = (sum + offset) >> shift; + return dc_val; +} + +//8.4.5.2.12 Specification of INTRA_DC intra prediction mode +static av_always_inline void FUNC(pred_dc)(uint8_t *_src, const uint8_t *_top, + const uint8_t *_left, const int w, const int h, const ptrdiff_t stride) +{ + int x, y; + pixel *src = (pixel *)_src; + const pixel *top = (const pixel *)_top; + const pixel *left = (const pixel *)_left; + const pixel dc = FUNC(pred_dc_val)(top, left, w, h); + const pixel4 a = PIXEL_SPLAT_X4(dc); + for (y = 0; y < h; y++) { + pixel *s = src; + for (x = 0; x < w; x += 4) { + AV_WN4P(s, a); + s += 4; + } + src += stride; + } +} + +static av_always_inline void FUNC(pred_v)(uint8_t *_src, const uint8_t *_top, + const int w, const int h, const ptrdiff_t stride) +{ + pixel *src = (pixel *)_src; + const pixel *top = (const pixel *)_top; + for (int y = 0; y < h; y++) { + memcpy(src, top, sizeof(pixel) * w); + src += stride; + } +} + +static void FUNC(pred_h)(uint8_t *_src, const uint8_t *_left, const int w, const int h, + const ptrdiff_t stride) +{ + pixel *src = (pixel *)_src; + const pixel *left = (const pixel *)_left; + for (int y = 0; y < h; y++) { + const pixel4 a = PIXEL_SPLAT_X4(left[y]); + for (int x = 0; x < w; x += 4) { + AV_WN4P(&POS(x, y), a); + } + } +} + +#define INTRA_LUMA_FILTER(p) CLIP((p[0] * f[0] + p[1] * f[1] + p[2] * f[2] + p[3] * f[3] + 32) >> 6) +#define INTRA_CHROMA_FILTER(p) (((32 - fact) * p[1] + fact * p[2] + 16) >> 5) + +//8.4.5.2.13 Specification of INTRA_ANGULAR2..INTRA_ANGULAR66 intra prediction modes +static void FUNC(pred_angular_v)(uint8_t *_src, const uint8_t *_top, const uint8_t *_left, + const int w, const int h, const ptrdiff_t stride, const int c_idx, const int mode, + const int ref_idx, const int filter_flag, const int need_pdpc) +{ + pixel *src = (pixel *)_src; + const pixel *left = (const pixel *)_left; + const pixel *top = (const pixel *)_top - (1 + ref_idx); + const int intra_pred_angle = ff_vvc_intra_pred_angle_derive(mode); + int pos = (1 + ref_idx) * intra_pred_angle; + const int dp = intra_pred_angle; + const int is_luma = !c_idx; + int nscale, inv_angle; + + if (need_pdpc) { + inv_angle = ff_vvc_intra_inv_angle_derive(intra_pred_angle); + nscale = ff_vvc_nscale_derive(w, h, mode); + } + + for (int y = 0; y < h; y++) { + const int idx = (pos >> 5) + ref_idx; + const int fact = pos & 31; + if (!fact && (!is_luma || !filter_flag)) { + for (int x = 0; x < w; x++) { + const pixel *p = top + x + idx + 1; + POS(x, y) = *p; + } + } else { + if (!c_idx) { + const int8_t *f = ff_vvc_intra_luma_filter[filter_flag][fact]; + for (int x = 0; x < w; x++) { + const pixel *p = top + x + idx; + POS(x, y) = INTRA_LUMA_FILTER(p); + } + } else { + for (int x = 0; x < w; x++) { + const pixel *p = top + x + idx; + POS(x, y) = INTRA_CHROMA_FILTER(p); + } + } + } + if (need_pdpc) { + int inv_angle_sum = 256 + inv_angle; + for (int x = 0; x < FFMIN(w, 3 << nscale); x++) { + const pixel l = left[y + (inv_angle_sum >> 9)]; + const pixel val = POS(x, y); + const int wl = 32 >> ((x << 1) >> nscale); + const int pred = val + (((l - val) * wl + 32) >> 6); + POS(x, y) = CLIP(pred); + inv_angle_sum += inv_angle; + } + } + pos += dp; + } +} + +//8.4.5.2.13 Specification of INTRA_ANGULAR2..INTRA_ANGULAR66 intra prediction modes +static void FUNC(pred_angular_h)(uint8_t *_src, const uint8_t *_top, const uint8_t *_left, + const int w, const int h, const ptrdiff_t stride, const int c_idx, const int mode, + const int ref_idx, const int filter_flag, const int need_pdpc) +{ + pixel *src = (pixel *)_src; + const pixel *left = (const pixel *)_left - (1 + ref_idx); + const pixel *top = (const pixel *)_top; + const int is_luma = !c_idx; + const int intra_pred_angle = ff_vvc_intra_pred_angle_derive(mode); + const int dp = intra_pred_angle; + int nscale = 0, inv_angle, inv_angle_sum; + + if (need_pdpc) { + inv_angle = ff_vvc_intra_inv_angle_derive(intra_pred_angle); + inv_angle_sum = 256 + inv_angle; + nscale = ff_vvc_nscale_derive(w, h, mode); + } + + for (int y = 0; y < h; y++) { + int pos = (1 + ref_idx) * intra_pred_angle; + int wt; + if (need_pdpc) + wt = (32 >> ((y * 2) >> nscale)); + + for (int x = 0; x < w; x++) { + const int idx = (pos >> 5) + ref_idx; + const int fact = pos & 31; + const pixel *p = left + y + idx; + int pred; + if (!fact && (!is_luma || !filter_flag)) { + pred = p[1]; + } else { + if (!c_idx) { + const int8_t *f = ff_vvc_intra_luma_filter[filter_flag][fact]; + pred = INTRA_LUMA_FILTER(p); + } else { + pred = INTRA_CHROMA_FILTER(p); + } + } + if (need_pdpc) { + if (y < (3 << nscale)) { + const pixel t = top[x + (inv_angle_sum >> 9)]; + pred = CLIP(pred + (((t - pred) * wt + 32) >> 6)); + } + } + POS(x, y) = pred; + pos += dp; + } + if (need_pdpc) + inv_angle_sum += inv_angle; + } +} + +static void FUNC(ff_vvc_intra_dsp_init)(VVCIntraDSPContext *const intra) +{ + intra->lmcs_scale_chroma = FUNC(lmcs_scale_chroma); + intra->intra_cclm_pred = FUNC(intra_cclm_pred); + intra->intra_pred = FUNC(intra_pred); + intra->pred_planar = FUNC(pred_planar); + intra->pred_mip = FUNC(pred_mip); + intra->pred_dc = FUNC(pred_dc); + intra->pred_v = FUNC(pred_v); + intra->pred_h = FUNC(pred_h); + intra->pred_angular_v = FUNC(pred_angular_v); + intra->pred_angular_h = FUNC(pred_angular_h); +}