From 88170070c4036530b79690c70e603fdddbb021c4 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Wed, 7 Sep 2022 13:58:53 +0200 Subject: [PATCH] avcodec: add bonk audio decoder --- Changelog | 1 + libavcodec/Makefile | 1 + libavcodec/allcodecs.c | 1 + libavcodec/bonk.c | 433 ++++++++++++++++++++++++++++++++++++++++ libavcodec/codec_desc.c | 7 + libavcodec/codec_id.h | 1 + libavcodec/version.h | 4 +- 7 files changed, 446 insertions(+), 2 deletions(-) create mode 100644 libavcodec/bonk.c diff --git a/Changelog b/Changelog index f34e8e5d42..66756a73e2 100644 --- a/Changelog +++ b/Changelog @@ -11,6 +11,7 @@ version : - VAAPI decoding and encoding for 10/12bit 422, 10/12bit 444 HEVC and VP9 - WBMP (Wireless Application Protocol Bitmap) image format - a3dscope filter +- bonk decoder version 5.1: diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 945908e3b8..0b46bc0173 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -263,6 +263,7 @@ OBJS-$(CONFIG_BMP_DECODER) += bmp.o msrledec.o OBJS-$(CONFIG_BMP_ENCODER) += bmpenc.o OBJS-$(CONFIG_BMV_AUDIO_DECODER) += bmvaudio.o OBJS-$(CONFIG_BMV_VIDEO_DECODER) += bmvvideo.o +OBJS-$(CONFIG_BONK_DECODER) += bonk.o OBJS-$(CONFIG_BRENDER_PIX_DECODER) += brenderpix.o OBJS-$(CONFIG_C93_DECODER) += c93.o OBJS-$(CONFIG_CAVS_DECODER) += cavs.o cavsdec.o cavsdsp.o \ diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 6939a4e25f..5d58a5d9f0 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -446,6 +446,7 @@ extern const FFCodec ff_atrac9_decoder; extern const FFCodec ff_binkaudio_dct_decoder; extern const FFCodec ff_binkaudio_rdft_decoder; extern const FFCodec ff_bmv_audio_decoder; +extern const FFCodec ff_bonk_decoder; extern const FFCodec ff_cook_decoder; extern const FFCodec ff_dca_encoder; extern const FFCodec ff_dca_decoder; diff --git a/libavcodec/bonk.c b/libavcodec/bonk.c new file mode 100644 index 0000000000..f3d797d588 --- /dev/null +++ b/libavcodec/bonk.c @@ -0,0 +1,433 @@ +/* + * Bonk audio decoder + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/internal.h" +#include "libavutil/intreadwrite.h" +#include "avcodec.h" +#include "codec_internal.h" +#include "decode.h" +#define BITSTREAM_READER_LE +#include "get_bits.h" +#include "bytestream.h" + +typedef struct BitCount { + uint8_t bit; + unsigned count; +} BitCount; + +typedef struct BonkContext { + GetBitContext gb; + int skip; + + uint8_t *bitstream; + int64_t max_framesize; + int bitstream_size; + int bitstream_index; + + uint64_t nb_samples; + int lossless; + int mid_side; + int n_taps; + int down_sampling; + int samples_per_packet; + + int state[2][2048], k[2048]; + int *samples[2]; + int *input_samples; + uint8_t quant[2048]; + BitCount *bits; +} BonkContext; + +static av_cold int bonk_close(AVCodecContext *avctx) +{ + BonkContext *s = avctx->priv_data; + + av_freep(&s->bitstream); + av_freep(&s->input_samples); + av_freep(&s->samples[0]); + av_freep(&s->samples[1]); + av_freep(&s->bits); + s->bitstream_size = 0; + + return 0; +} + +static av_cold int bonk_init(AVCodecContext *avctx) +{ + BonkContext *s = avctx->priv_data; + + avctx->sample_fmt = AV_SAMPLE_FMT_S16P; + if (avctx->extradata_size < 17) + return AVERROR(EINVAL); + + if (avctx->extradata[0]) { + av_log(avctx, AV_LOG_ERROR, "Unsupported version.\n"); + return AVERROR_INVALIDDATA; + } + + if (avctx->ch_layout.nb_channels < 1 || avctx->ch_layout.nb_channels > 2) + return AVERROR_INVALIDDATA; + + s->nb_samples = AV_RL32(avctx->extradata + 1) / avctx->ch_layout.nb_channels; + if (!s->nb_samples) + s->nb_samples = UINT64_MAX; + s->lossless = avctx->extradata[10] != 0; + s->mid_side = avctx->extradata[11] != 0; + s->n_taps = AV_RL16(avctx->extradata + 12); + if (!s->n_taps || s->n_taps > 2048) + return AVERROR(EINVAL); + + s->down_sampling = avctx->extradata[14]; + if (!s->down_sampling) + return AVERROR(EINVAL); + + s->samples_per_packet = AV_RL16(avctx->extradata + 15); + if (!s->samples_per_packet) + return AVERROR(EINVAL); + s->max_framesize = s->samples_per_packet * avctx->ch_layout.nb_channels * s->down_sampling * 16LL; + if (s->max_framesize > (INT32_MAX - AV_INPUT_BUFFER_PADDING_SIZE) / 8) + return AVERROR_INVALIDDATA; + + s->bitstream = av_calloc(s->max_framesize + AV_INPUT_BUFFER_PADDING_SIZE, sizeof(*s->bitstream)); + if (!s->bitstream) + return AVERROR(ENOMEM); + + s->input_samples = av_calloc(s->samples_per_packet, sizeof(*s->input_samples)); + if (!s->input_samples) + return AVERROR(ENOMEM); + + s->samples[0] = av_calloc(s->samples_per_packet * s->down_sampling, sizeof(*s->samples[0])); + s->samples[1] = av_calloc(s->samples_per_packet * s->down_sampling, sizeof(*s->samples[0])); + if (!s->samples[0] || !s->samples[1]) + return AVERROR(ENOMEM); + + s->bits = av_calloc(s->max_framesize * 8, sizeof(*s->bits)); + if (!s->bits) + return AVERROR(ENOMEM); + + for (int i = 0; i < 512; i++) { + s->quant[i] = sqrt(i + 1); + } + + return 0; +} + +static unsigned read_uint_max(BonkContext *s, uint32_t max) +{ + unsigned value = 0; + int i, bits; + + if (max == 0) + return 0; + + if (max >> 31) + return 32; + + bits = 32 - ff_clz(max); + + for (i = 0; i < bits - 1; i++) + if (get_bits1(&s->gb)) + value += 1 << i; + + if ((value | (1 << (bits - 1))) <= max) + if (get_bits1(&s->gb)) + value += 1 << (bits - 1); + + return value; +} + +static int intlist_read(BonkContext *s, int *buf, int entries, int base_2_part) +{ + int i, low_bits = 0, x = 0, max_x; + int n_zeros = 0, step = 256, dominant = 0; + int pos = 0, level = 0; + BitCount *bits = s->bits; + + memset(buf, 0, entries * sizeof(*buf)); + if (base_2_part) { + low_bits = get_bits(&s->gb, 4); + + if (low_bits) + for (i = 0; i < entries; i++) + buf[i] = get_bits(&s->gb, low_bits); + } + + while (n_zeros < entries) { + int steplet = step >> 8; + + if (get_bits_left(&s->gb) <= 0) + return AVERROR_INVALIDDATA; + + if (!get_bits1(&s->gb)) { + if (steplet < 0) + break; + + if (steplet > 0) { + bits[x ].bit = dominant; + bits[x++].count = steplet; + } + + if (!dominant) + n_zeros += steplet; + + step += step / 8; + } else if (steplet > 0) { + int actual_run = read_uint_max(s, steplet - 1); + + if (actual_run < 0) + break; + + if (actual_run > 0) { + bits[x ].bit = dominant; + bits[x++].count = actual_run; + } + + bits[x ].bit = !dominant; + bits[x++].count = 1; + + if (!dominant) + n_zeros += actual_run; + else + n_zeros++; + + step -= step / 8; + } + + if (step < 256) { + if (step == 0) + return AVERROR_INVALIDDATA; + step = 65536 / step; + dominant = !dominant; + } + } + + max_x = x; + x = 0; + n_zeros = 0; + for (i = 0; n_zeros < entries; i++) { + if (pos >= entries) { + pos = 0; + level += 1 << low_bits; + } + + if (x >= max_x) + return AVERROR_INVALIDDATA; + + if (buf[pos] >= level) { + if (bits[x].bit) + buf[pos] += 1 << low_bits; + else + n_zeros++; + + bits[x].count--; + x += bits[x].count == 0; + } + + pos++; + } + + for (i = 0; i < entries; i++) { + if (buf[i] && get_bits1(&s->gb)) { + buf[i] = -buf[i]; + } + } + + return 0; +} + +static inline int shift_down(int a, int b) +{ + return (a >> b) + (a < 0); +} + +static inline int shift(int a, int b) +{ + return a + (1 << b - 1) >> b; +} + +#define LATTICE_SHIFT 10 +#define SAMPLE_SHIFT 4 +#define SAMPLE_FACTOR (1 << SAMPLE_SHIFT) + +static int predictor_calc_error(int *k, int *state, int order, int error) +{ + int i, x = error - shift_down(k[order-1] * state[order-1], LATTICE_SHIFT); + int *k_ptr = &(k[order-2]), + *state_ptr = &(state[order-2]); + + for (i = order-2; i >= 0; i--, k_ptr--, state_ptr--) { + int k_value = *k_ptr, state_value = *state_ptr; + + x -= shift_down(k_value * state_value, LATTICE_SHIFT); + state_ptr[1] = state_value + shift_down(k_value * x, LATTICE_SHIFT); + } + + // don't drift too far, to avoid overflows + av_clip(x, -(SAMPLE_FACTOR << 16), SAMPLE_FACTOR << 16); + + state[0] = x; + + return x; +} + +static void predictor_init_state(int *k, int *state, int order) +{ + for (int i = order - 2; i >= 0; i--) { + int x = state[i]; + + for (int j = 0, p = i + 1; p < order; j++, p++) { + int tmp = x + shift_down(k[j] * state[p], LATTICE_SHIFT); + + state[p] += shift_down(k[j] * x, LATTICE_SHIFT); + x = tmp; + } + } +} + +static int bonk_decode(AVCodecContext *avctx, AVFrame *frame, + int *got_frame_ptr, AVPacket *pkt) +{ + BonkContext *s = avctx->priv_data; + GetBitContext *gb = &s->gb; + const uint8_t *buf; + int quant, n, buf_size, input_buf_size; + int ret = AVERROR_INVALIDDATA; + + if ((!pkt->size && !s->bitstream_size) || s->nb_samples == 0) { + *got_frame_ptr = 0; + return pkt->size; + } + + buf_size = FFMIN(pkt->size, s->max_framesize - s->bitstream_size); + input_buf_size = buf_size; + if (s->bitstream_index + s->bitstream_size + buf_size + AV_INPUT_BUFFER_PADDING_SIZE > s->max_framesize) { + memmove(s->bitstream, &s->bitstream[s->bitstream_index], s->bitstream_size); + s->bitstream_index = 0; + } + if (pkt->data) + memcpy(&s->bitstream[s->bitstream_index + s->bitstream_size], pkt->data, buf_size); + buf = &s->bitstream[s->bitstream_index]; + buf_size += s->bitstream_size; + s->bitstream_size = buf_size; + if (buf_size < s->max_framesize && pkt->data) { + *got_frame_ptr = 0; + return input_buf_size; + } + + frame->nb_samples = FFMIN(s->samples_per_packet * s->down_sampling, s->nb_samples); + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) + return ret; + + if ((ret = init_get_bits8(gb, buf, buf_size)) < 0) + return ret; + + skip_bits(gb, s->skip); + if ((ret = intlist_read(s, s->k, s->n_taps, 0)) < 0) + return ret; + + for (int i = 0; i < s->n_taps; i++) + s->k[i] *= s->quant[i]; + quant = s->lossless ? 1 : get_bits(&s->gb, 16) * SAMPLE_FACTOR; + + for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) { + const int samples_per_packet = s->samples_per_packet; + const int down_sampling = s->down_sampling; + const int offset = samples_per_packet * down_sampling - 1; + int *state = s->state[ch]; + int *sample = s->samples[ch]; + + predictor_init_state(s->k, state, s->n_taps); + if ((ret = intlist_read(s, s->input_samples, samples_per_packet, 1)) < 0) + return ret; + + for (int i = 0; i < samples_per_packet; i++) { + for (int j = 0; j < s->down_sampling - 1; j++) { + sample[0] = predictor_calc_error(s->k, state, s->n_taps, 0); + sample++; + } + + sample[0] = predictor_calc_error(s->k, state, s->n_taps, s->input_samples[i] * quant); + sample++; + } + + sample = s->samples[ch]; + for (int i = 0; i < s->n_taps; i++) + state[i] = sample[offset - i]; + } + + if (s->mid_side && avctx->ch_layout.nb_channels == 2) { + for (int i = 0; i < frame->nb_samples; i++) { + s->samples[1][i] += shift(s->samples[0][i], 1); + s->samples[0][i] -= s->samples[1][i]; + } + } + + if (!s->lossless) { + for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) { + int *samples = s->samples[ch]; + for (int i = 0; i < frame->nb_samples; i++) + samples[i] = shift(samples[i], 4); + } + } + + for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) { + int16_t *osamples = (int16_t *)frame->extended_data[ch]; + int *samples = s->samples[ch]; + for (int i = 0; i < frame->nb_samples; i++) + osamples[i] = av_clip_int16(samples[i]); + } + + s->nb_samples -= frame->nb_samples; + + s->skip = get_bits_count(gb) - 8 * (get_bits_count(gb) / 8); + n = get_bits_count(gb) / 8; + + if (n > buf_size) { + s->bitstream_size = 0; + s->bitstream_index = 0; + return AVERROR_INVALIDDATA; + } + + *got_frame_ptr = 1; + + if (s->bitstream_size) { + s->bitstream_index += n; + s->bitstream_size -= n; + return input_buf_size; + } + return n; +} + +const FFCodec ff_bonk_decoder = { + .p.name = "bonk", + CODEC_LONG_NAME("Bonk audio"), + .p.type = AVMEDIA_TYPE_AUDIO, + .p.id = AV_CODEC_ID_BONK, + .priv_data_size = sizeof(BonkContext), + .init = bonk_init, + FF_CODEC_DECODE_CB(bonk_decode), + .close = bonk_close, + .p.capabilities = AV_CODEC_CAP_DELAY | + AV_CODEC_CAP_DR1 | + AV_CODEC_CAP_SUBFRAMES, + .caps_internal = FF_CODEC_CAP_INIT_CLEANUP, + .p.sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_NONE }, +}; diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c index 06dfe55d0f..c002480d39 100644 --- a/libavcodec/codec_desc.c +++ b/libavcodec/codec_desc.c @@ -3290,6 +3290,13 @@ static const AVCodecDescriptor codec_descriptors[] = { .long_name = NULL_IF_CONFIG_SMALL("DFPWM (Dynamic Filter Pulse Width Modulation)"), .props = AV_CODEC_PROP_LOSSY, }, + { + .id = AV_CODEC_ID_BONK, + .type = AVMEDIA_TYPE_AUDIO, + .name = "bonk", + .long_name = NULL_IF_CONFIG_SMALL("Bonk audio"), + .props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, + }, /* subtitle codecs */ { diff --git a/libavcodec/codec_id.h b/libavcodec/codec_id.h index 2247bc0309..858b5c3a75 100644 --- a/libavcodec/codec_id.h +++ b/libavcodec/codec_id.h @@ -527,6 +527,7 @@ enum AVCodecID { AV_CODEC_ID_FASTAUDIO, AV_CODEC_ID_MSNSIREN, AV_CODEC_ID_DFPWM, + AV_CODEC_ID_BONK, /* subtitle codecs */ AV_CODEC_ID_FIRST_SUBTITLE = 0x17000, ///< A dummy ID pointing at the start of subtitle codecs. diff --git a/libavcodec/version.h b/libavcodec/version.h index d251ae2eff..2328be4b26 100644 --- a/libavcodec/version.h +++ b/libavcodec/version.h @@ -29,8 +29,8 @@ #include "version_major.h" -#define LIBAVCODEC_VERSION_MINOR 42 -#define LIBAVCODEC_VERSION_MICRO 104 +#define LIBAVCODEC_VERSION_MINOR 43 +#define LIBAVCODEC_VERSION_MICRO 100 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \