From ea93052db3594f93f2d10be085a770184da0513d Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Fri, 23 Dec 2016 15:41:51 +0100 Subject: [PATCH] avcodec/utvideodec: add SIMD support for median prediction for planar formats ~10% faster overall. Signed-off-by: Paul B Mahol --- configure | 2 +- libavcodec/utvideo.h | 2 + libavcodec/utvideodec.c | 164 ++++++++++++++++++++++++++++++++-------- libavcodec/utvideoenc.c | 3 + 4 files changed, 140 insertions(+), 31 deletions(-) diff --git a/configure b/configure index e2e171e150..d723b8e2a6 100755 --- a/configure +++ b/configure @@ -2531,7 +2531,7 @@ truespeech_decoder_select="bswapdsp" tscc_decoder_select="zlib" twinvq_decoder_select="mdct lsp sinewin" txd_decoder_select="texturedsp" -utvideo_decoder_select="bswapdsp" +utvideo_decoder_select="bswapdsp huffyuvdsp" utvideo_encoder_select="bswapdsp huffman huffyuvencdsp" vble_decoder_select="huffyuvdsp" vc1_decoder_select="blockdsp h263_decoder h264qpel intrax8 mpegvideo vc1dsp" diff --git a/libavcodec/utvideo.h b/libavcodec/utvideo.h index 49ddf49882..0d1086594a 100644 --- a/libavcodec/utvideo.h +++ b/libavcodec/utvideo.h @@ -30,6 +30,7 @@ #include "libavutil/common.h" #include "avcodec.h" #include "bswapdsp.h" +#include "huffyuvdsp.h" #include "huffyuvencdsp.h" enum { @@ -69,6 +70,7 @@ typedef struct UtvideoContext { const AVClass *class; AVCodecContext *avctx; BswapDSPContext bdsp; + HuffYUVDSPContext hdspdec; HuffYUVEncDSPContext hdsp; uint32_t frame_info_size, flags, frame_info; diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c index 650c0ec67d..cae0ff5f5c 100644 --- a/libavcodec/utvideodec.c +++ b/libavcodec/utvideodec.c @@ -372,8 +372,111 @@ static void restore_rgb_planes10(AVFrame *frame, int width, int height) } } -static void restore_median(uint8_t *src, int step, int stride, - int width, int height, int slices, int rmode) +#undef A +#undef B +#undef C + +static void restore_median_planar(UtvideoContext *c, uint8_t *src, int stride, + int width, int height, int slices, int rmode) +{ + int i, j, slice; + int A, B, C; + uint8_t *bsrc; + int slice_start, slice_height; + const int cmask = ~rmode; + + for (slice = 0; slice < slices; slice++) { + slice_start = ((slice * height) / slices) & cmask; + slice_height = ((((slice + 1) * height) / slices) & cmask) - + slice_start; + + if (!slice_height) + continue; + bsrc = src + slice_start * stride; + + // first line - left neighbour prediction + bsrc[0] += 0x80; + c->hdspdec.add_hfyu_left_pred(bsrc, bsrc, width, 0); + bsrc += stride; + if (slice_height <= 1) + continue; + // second line - first element has top prediction, the rest uses median + C = bsrc[-stride]; + bsrc[0] += C; + A = bsrc[0]; + for (i = 1; i < width; i++) { + B = bsrc[i - stride]; + bsrc[i] += mid_pred(A, B, (uint8_t)(A + B - C)); + C = B; + A = bsrc[i]; + } + bsrc += stride; + // the rest of lines use continuous median prediction + for (j = 2; j < slice_height; j++) { + c->hdspdec.add_hfyu_median_pred(bsrc, bsrc - stride, + bsrc, width, &B, &C); + bsrc += stride; + } + } +} + +/* UtVideo interlaced mode treats every two lines as a single one, + * so restoring function should take care of possible padding between + * two parts of the same "line". + */ +static void restore_median_planar_il(UtvideoContext *c, uint8_t *src, int stride, + int width, int height, int slices, int rmode) +{ + int i, j, slice; + int A, B, C; + uint8_t *bsrc; + int slice_start, slice_height; + const int cmask = ~(rmode ? 3 : 1); + const int stride2 = stride << 1; + + for (slice = 0; slice < slices; slice++) { + slice_start = ((slice * height) / slices) & cmask; + slice_height = ((((slice + 1) * height) / slices) & cmask) - + slice_start; + slice_height >>= 1; + if (!slice_height) + continue; + + bsrc = src + slice_start * stride; + + // first line - left neighbour prediction + bsrc[0] += 0x80; + A = c->hdspdec.add_hfyu_left_pred(bsrc, bsrc, width, 0); + c->hdspdec.add_hfyu_left_pred(bsrc + stride, bsrc + stride, width, A); + bsrc += stride2; + if (slice_height <= 1) + continue; + // second line - first element has top prediction, the rest uses median + C = bsrc[-stride2]; + bsrc[0] += C; + A = bsrc[0]; + for (i = 1; i < width; i++) { + B = bsrc[i - stride2]; + bsrc[i] += mid_pred(A, B, (uint8_t)(A + B - C)); + C = B; + A = bsrc[i]; + } + c->hdspdec.add_hfyu_median_pred(bsrc + stride, bsrc - stride, + bsrc + stride, width, &B, &C); + bsrc += stride2; + // the rest of lines use continuous median prediction + for (j = 2; j < slice_height; j++) { + c->hdspdec.add_hfyu_median_pred(bsrc, bsrc - stride2, + bsrc, width, &B, &C); + c->hdspdec.add_hfyu_median_pred(bsrc + stride, bsrc - stride, + bsrc + stride, width, &B, &C); + bsrc += stride2; + } + } +} + +static void restore_median_packed(uint8_t *src, int step, int stride, + int width, int height, int slices, int rmode) { int i, j, slice; int A, B, C; @@ -428,8 +531,8 @@ static void restore_median(uint8_t *src, int step, int stride, * so restoring function should take care of possible padding between * two parts of the same "line". */ -static void restore_median_il(uint8_t *src, int step, int stride, - int width, int height, int slices, int rmode) +static void restore_median_packed_il(uint8_t *src, int step, int stride, + int width, int height, int slices, int rmode) { int i, j, slice; int A, B, C; @@ -608,14 +711,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, return ret; if (c->frame_pred == PRED_MEDIAN) { if (!c->interlaced) { - restore_median(frame.f->data[0] + ff_ut_rgb_order[i], - c->planes, frame.f->linesize[0], avctx->width, - avctx->height, c->slices, 0); + restore_median_packed(frame.f->data[0] + ff_ut_rgb_order[i], + c->planes, frame.f->linesize[0], avctx->width, + avctx->height, c->slices, 0); } else { - restore_median_il(frame.f->data[0] + ff_ut_rgb_order[i], - c->planes, frame.f->linesize[0], - avctx->width, avctx->height, c->slices, - 0); + restore_median_packed_il(frame.f->data[0] + ff_ut_rgb_order[i], + c->planes, frame.f->linesize[0], + avctx->width, avctx->height, c->slices, + 0); } } } @@ -644,14 +747,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, return ret; if (c->frame_pred == PRED_MEDIAN) { if (!c->interlaced) { - restore_median(frame.f->data[i], 1, frame.f->linesize[i], - avctx->width >> !!i, avctx->height >> !!i, - c->slices, !i); + restore_median_planar(c, frame.f->data[i], frame.f->linesize[i], + avctx->width >> !!i, avctx->height >> !!i, + c->slices, !i); } else { - restore_median_il(frame.f->data[i], 1, frame.f->linesize[i], - avctx->width >> !!i, - avctx->height >> !!i, - c->slices, !i); + restore_median_planar_il(c, frame.f->data[i], frame.f->linesize[i], + avctx->width >> !!i, + avctx->height >> !!i, + c->slices, !i); } } } @@ -665,13 +768,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, return ret; if (c->frame_pred == PRED_MEDIAN) { if (!c->interlaced) { - restore_median(frame.f->data[i], 1, frame.f->linesize[i], - avctx->width >> !!i, avctx->height, - c->slices, 0); + restore_median_planar(c, frame.f->data[i], frame.f->linesize[i], + avctx->width >> !!i, avctx->height, + c->slices, 0); } else { - restore_median_il(frame.f->data[i], 1, frame.f->linesize[i], - avctx->width >> !!i, avctx->height, - c->slices, 0); + restore_median_planar_il(c, frame.f->data[i], frame.f->linesize[i], + avctx->width >> !!i, avctx->height, + c->slices, 0); } } } @@ -685,13 +788,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, return ret; if (c->frame_pred == PRED_MEDIAN) { if (!c->interlaced) { - restore_median(frame.f->data[i], 1, frame.f->linesize[i], - avctx->width, avctx->height, - c->slices, 0); + restore_median_planar(c, frame.f->data[i], frame.f->linesize[i], + avctx->width, avctx->height, + c->slices, 0); } else { - restore_median_il(frame.f->data[i], 1, frame.f->linesize[i], - avctx->width, avctx->height, - c->slices, 0); + restore_median_planar_il(c, frame.f->data[i], frame.f->linesize[i], + avctx->width, avctx->height, + c->slices, 0); } } } @@ -724,6 +827,7 @@ static av_cold int decode_init(AVCodecContext *avctx) c->avctx = avctx; ff_bswapdsp_init(&c->bdsp); + ff_huffyuvdsp_init(&c->hdspdec); if (avctx->extradata_size >= 16) { av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d.%d.%d\n", diff --git a/libavcodec/utvideoenc.c b/libavcodec/utvideoenc.c index 6082943764..fd27a15fd8 100644 --- a/libavcodec/utvideoenc.c +++ b/libavcodec/utvideoenc.c @@ -293,6 +293,9 @@ static void left_predict(uint8_t *src, uint8_t *dst, int stride, } } +#undef A +#undef B + /* Write data to a plane with median prediction */ static void median_predict(UtvideoContext *c, uint8_t *src, uint8_t *dst, int stride, int width, int height)