mirror of https://github.com/FFmpeg/FFmpeg.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
635 lines
20 KiB
635 lines
20 KiB
/* |
|
* ALAC (Apple Lossless Audio Codec) decoder |
|
* Copyright (c) 2005 David Hammerton |
|
* |
|
* This file is part of FFmpeg. |
|
* |
|
* FFmpeg is free software; you can redistribute it and/or |
|
* modify it under the terms of the GNU Lesser General Public |
|
* License as published by the Free Software Foundation; either |
|
* version 2.1 of the License, or (at your option) any later version. |
|
* |
|
* FFmpeg is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
* Lesser General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU Lesser General Public |
|
* License along with FFmpeg; if not, write to the Free Software |
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
*/ |
|
|
|
/** |
|
* @file alac.c |
|
* ALAC (Apple Lossless Audio Codec) decoder |
|
* @author 2005 David Hammerton |
|
* |
|
* For more information on the ALAC format, visit: |
|
* http://crazney.net/programs/itunes/alac.html |
|
* |
|
* Note: This decoder expects a 36- (0x24-)byte QuickTime atom to be |
|
* passed through the extradata[_size] fields. This atom is tacked onto |
|
* the end of an 'alac' stsd atom and has the following format: |
|
* bytes 0-3 atom size (0x24), big-endian |
|
* bytes 4-7 atom type ('alac', not the 'alac' tag from start of stsd) |
|
* bytes 8-35 data bytes needed by decoder |
|
* |
|
* Extradata: |
|
* 32bit size |
|
* 32bit tag (=alac) |
|
* 32bit zero? |
|
* 32bit max sample per frame |
|
* 8bit ?? (zero?) |
|
* 8bit sample size |
|
* 8bit history mult |
|
* 8bit initial history |
|
* 8bit kmodifier |
|
* 8bit channels? |
|
* 16bit ?? |
|
* 32bit max coded frame size |
|
* 32bit bitrate? |
|
* 32bit samplerate |
|
*/ |
|
|
|
|
|
#include "avcodec.h" |
|
#include "bitstream.h" |
|
#include "bytestream.h" |
|
#include "unary.h" |
|
|
|
#define ALAC_EXTRADATA_SIZE 36 |
|
#define MAX_CHANNELS 2 |
|
|
|
typedef struct { |
|
|
|
AVCodecContext *avctx; |
|
GetBitContext gb; |
|
/* init to 0; first frame decode should initialize from extradata and |
|
* set this to 1 */ |
|
int context_initialized; |
|
|
|
int samplesize; |
|
int numchannels; |
|
int bytespersample; |
|
|
|
/* buffers */ |
|
int32_t *predicterror_buffer[MAX_CHANNELS]; |
|
|
|
int32_t *outputsamples_buffer[MAX_CHANNELS]; |
|
|
|
/* stuff from setinfo */ |
|
uint32_t setinfo_max_samples_per_frame; /* 0x1000 = 4096 */ /* max samples per frame? */ |
|
uint8_t setinfo_7a; /* 0x00 */ |
|
uint8_t setinfo_sample_size; /* 0x10 */ |
|
uint8_t setinfo_rice_historymult; /* 0x28 */ |
|
uint8_t setinfo_rice_initialhistory; /* 0x0a */ |
|
uint8_t setinfo_rice_kmodifier; /* 0x0e */ |
|
uint8_t setinfo_7f; /* 0x02 */ |
|
uint16_t setinfo_80; /* 0x00ff */ |
|
uint32_t setinfo_82; /* 0x000020e7 */ /* max sample size?? */ |
|
uint32_t setinfo_86; /* 0x00069fe4 */ /* bit rate (average)?? */ |
|
uint32_t setinfo_8a_rate; /* 0x0000ac44 */ |
|
/* end setinfo stuff */ |
|
|
|
} ALACContext; |
|
|
|
static void allocate_buffers(ALACContext *alac) |
|
{ |
|
int chan; |
|
for (chan = 0; chan < MAX_CHANNELS; chan++) { |
|
alac->predicterror_buffer[chan] = |
|
av_malloc(alac->setinfo_max_samples_per_frame * 4); |
|
|
|
alac->outputsamples_buffer[chan] = |
|
av_malloc(alac->setinfo_max_samples_per_frame * 4); |
|
} |
|
} |
|
|
|
static int alac_set_info(ALACContext *alac) |
|
{ |
|
const unsigned char *ptr = alac->avctx->extradata; |
|
|
|
ptr += 4; /* size */ |
|
ptr += 4; /* alac */ |
|
ptr += 4; /* 0 ? */ |
|
|
|
if(AV_RB32(ptr) >= UINT_MAX/4){ |
|
av_log(alac->avctx, AV_LOG_ERROR, "setinfo_max_samples_per_frame too large\n"); |
|
return -1; |
|
} |
|
|
|
/* buffer size / 2 ? */ |
|
alac->setinfo_max_samples_per_frame = bytestream_get_be32(&ptr); |
|
alac->setinfo_7a = *ptr++; |
|
alac->setinfo_sample_size = *ptr++; |
|
alac->setinfo_rice_historymult = *ptr++; |
|
alac->setinfo_rice_initialhistory = *ptr++; |
|
alac->setinfo_rice_kmodifier = *ptr++; |
|
/* channels? */ |
|
alac->setinfo_7f = *ptr++; |
|
alac->setinfo_80 = bytestream_get_be16(&ptr); |
|
/* max coded frame size */ |
|
alac->setinfo_82 = bytestream_get_be32(&ptr); |
|
/* bitrate ? */ |
|
alac->setinfo_86 = bytestream_get_be32(&ptr); |
|
/* samplerate */ |
|
alac->setinfo_8a_rate = bytestream_get_be32(&ptr); |
|
|
|
allocate_buffers(alac); |
|
|
|
return 0; |
|
} |
|
|
|
static inline int count_leading_zeros(int32_t input) |
|
{ |
|
return 31-av_log2(input); |
|
} |
|
|
|
|
|
static inline int decode_scalar(GetBitContext *gb, int k, int limit, int readsamplesize){ |
|
/* read x - number of 1s before 0 represent the rice */ |
|
int x = get_unary_0_9(gb); |
|
|
|
if (x > 8) { /* RICE THRESHOLD */ |
|
/* use alternative encoding */ |
|
x = get_bits(gb, readsamplesize); |
|
} else { |
|
if (k >= limit) |
|
k = limit; |
|
|
|
if (k != 1) { |
|
int extrabits = show_bits(gb, k); |
|
|
|
/* multiply x by 2^k - 1, as part of their strange algorithm */ |
|
x = (x << k) - x; |
|
|
|
if (extrabits > 1) { |
|
x += extrabits - 1; |
|
skip_bits(gb, k); |
|
} else |
|
skip_bits(gb, k - 1); |
|
} |
|
} |
|
return x; |
|
} |
|
|
|
static void bastardized_rice_decompress(ALACContext *alac, |
|
int32_t *output_buffer, |
|
int output_size, |
|
int readsamplesize, /* arg_10 */ |
|
int rice_initialhistory, /* arg424->b */ |
|
int rice_kmodifier, /* arg424->d */ |
|
int rice_historymult, /* arg424->c */ |
|
int rice_kmodifier_mask /* arg424->e */ |
|
) |
|
{ |
|
int output_count; |
|
unsigned int history = rice_initialhistory; |
|
int sign_modifier = 0; |
|
|
|
for (output_count = 0; output_count < output_size; output_count++) { |
|
int32_t x; |
|
int32_t x_modified; |
|
int32_t final_val; |
|
|
|
/* standard rice encoding */ |
|
int k; /* size of extra bits */ |
|
|
|
/* read k, that is bits as is */ |
|
k = 31 - count_leading_zeros((history >> 9) + 3); |
|
x= decode_scalar(&alac->gb, k, rice_kmodifier, readsamplesize); |
|
|
|
x_modified = sign_modifier + x; |
|
final_val = (x_modified + 1) / 2; |
|
if (x_modified & 1) final_val *= -1; |
|
|
|
output_buffer[output_count] = final_val; |
|
|
|
sign_modifier = 0; |
|
|
|
/* now update the history */ |
|
history += x_modified * rice_historymult |
|
- ((history * rice_historymult) >> 9); |
|
|
|
if (x_modified > 0xffff) |
|
history = 0xffff; |
|
|
|
/* special case: there may be compressed blocks of 0 */ |
|
if ((history < 128) && (output_count+1 < output_size)) { |
|
int block_size, k; |
|
|
|
sign_modifier = 1; |
|
|
|
k = count_leading_zeros(history) + ((history + 16) >> 6 /* / 64 */) - 24; |
|
|
|
block_size= decode_scalar(&alac->gb, k, rice_kmodifier, 16); |
|
|
|
if (block_size > 0) { |
|
memset(&output_buffer[output_count+1], 0, block_size * 4); |
|
output_count += block_size; |
|
} |
|
|
|
if (block_size > 0xffff) |
|
sign_modifier = 0; |
|
|
|
history = 0; |
|
} |
|
} |
|
} |
|
|
|
static inline int32_t extend_sign32(int32_t val, int bits) |
|
{ |
|
return (val << (32 - bits)) >> (32 - bits); |
|
} |
|
|
|
static inline int sign_only(int v) |
|
{ |
|
return v ? FFSIGN(v) : 0; |
|
} |
|
|
|
static void predictor_decompress_fir_adapt(int32_t *error_buffer, |
|
int32_t *buffer_out, |
|
int output_size, |
|
int readsamplesize, |
|
int16_t *predictor_coef_table, |
|
int predictor_coef_num, |
|
int predictor_quantitization) |
|
{ |
|
int i; |
|
|
|
/* first sample always copies */ |
|
*buffer_out = *error_buffer; |
|
|
|
if (!predictor_coef_num) { |
|
if (output_size <= 1) |
|
return; |
|
|
|
memcpy(buffer_out+1, error_buffer+1, (output_size-1) * 4); |
|
return; |
|
} |
|
|
|
if (predictor_coef_num == 0x1f) { /* 11111 - max value of predictor_coef_num */ |
|
/* second-best case scenario for fir decompression, |
|
* error describes a small difference from the previous sample only |
|
*/ |
|
if (output_size <= 1) |
|
return; |
|
for (i = 0; i < output_size - 1; i++) { |
|
int32_t prev_value; |
|
int32_t error_value; |
|
|
|
prev_value = buffer_out[i]; |
|
error_value = error_buffer[i+1]; |
|
buffer_out[i+1] = |
|
extend_sign32((prev_value + error_value), readsamplesize); |
|
} |
|
return; |
|
} |
|
|
|
/* read warm-up samples */ |
|
if (predictor_coef_num > 0) |
|
for (i = 0; i < predictor_coef_num; i++) { |
|
int32_t val; |
|
|
|
val = buffer_out[i] + error_buffer[i+1]; |
|
val = extend_sign32(val, readsamplesize); |
|
buffer_out[i+1] = val; |
|
} |
|
|
|
#if 0 |
|
/* 4 and 8 are very common cases (the only ones i've seen). these |
|
* should be unrolled and optimized |
|
*/ |
|
if (predictor_coef_num == 4) { |
|
/* FIXME: optimized general case */ |
|
return; |
|
} |
|
|
|
if (predictor_coef_table == 8) { |
|
/* FIXME: optimized general case */ |
|
return; |
|
} |
|
#endif |
|
|
|
/* general case */ |
|
if (predictor_coef_num > 0) { |
|
for (i = predictor_coef_num + 1; i < output_size; i++) { |
|
int j; |
|
int sum = 0; |
|
int outval; |
|
int error_val = error_buffer[i]; |
|
|
|
for (j = 0; j < predictor_coef_num; j++) { |
|
sum += (buffer_out[predictor_coef_num-j] - buffer_out[0]) * |
|
predictor_coef_table[j]; |
|
} |
|
|
|
outval = (1 << (predictor_quantitization-1)) + sum; |
|
outval = outval >> predictor_quantitization; |
|
outval = outval + buffer_out[0] + error_val; |
|
outval = extend_sign32(outval, readsamplesize); |
|
|
|
buffer_out[predictor_coef_num+1] = outval; |
|
|
|
if (error_val > 0) { |
|
int predictor_num = predictor_coef_num - 1; |
|
|
|
while (predictor_num >= 0 && error_val > 0) { |
|
int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; |
|
int sign = sign_only(val); |
|
|
|
predictor_coef_table[predictor_num] -= sign; |
|
|
|
val *= sign; /* absolute value */ |
|
|
|
error_val -= ((val >> predictor_quantitization) * |
|
(predictor_coef_num - predictor_num)); |
|
|
|
predictor_num--; |
|
} |
|
} else if (error_val < 0) { |
|
int predictor_num = predictor_coef_num - 1; |
|
|
|
while (predictor_num >= 0 && error_val < 0) { |
|
int val = buffer_out[0] - buffer_out[predictor_coef_num - predictor_num]; |
|
int sign = - sign_only(val); |
|
|
|
predictor_coef_table[predictor_num] -= sign; |
|
|
|
val *= sign; /* neg value */ |
|
|
|
error_val -= ((val >> predictor_quantitization) * |
|
(predictor_coef_num - predictor_num)); |
|
|
|
predictor_num--; |
|
} |
|
} |
|
|
|
buffer_out++; |
|
} |
|
} |
|
} |
|
|
|
static void reconstruct_stereo_16(int32_t *buffer[MAX_CHANNELS], |
|
int16_t *buffer_out, |
|
int numchannels, int numsamples, |
|
uint8_t interlacing_shift, |
|
uint8_t interlacing_leftweight) |
|
{ |
|
int i; |
|
if (numsamples <= 0) |
|
return; |
|
|
|
/* weighted interlacing */ |
|
if (interlacing_leftweight) { |
|
for (i = 0; i < numsamples; i++) { |
|
int32_t a, b; |
|
|
|
a = buffer[0][i]; |
|
b = buffer[1][i]; |
|
|
|
a -= (b * interlacing_leftweight) >> interlacing_shift; |
|
b += a; |
|
|
|
buffer_out[i*numchannels] = b; |
|
buffer_out[i*numchannels + 1] = a; |
|
} |
|
|
|
return; |
|
} |
|
|
|
/* otherwise basic interlacing took place */ |
|
for (i = 0; i < numsamples; i++) { |
|
int16_t left, right; |
|
|
|
left = buffer[0][i]; |
|
right = buffer[1][i]; |
|
|
|
buffer_out[i*numchannels] = left; |
|
buffer_out[i*numchannels + 1] = right; |
|
} |
|
} |
|
|
|
static int alac_decode_frame(AVCodecContext *avctx, |
|
void *outbuffer, int *outputsize, |
|
const uint8_t *inbuffer, int input_buffer_size) |
|
{ |
|
ALACContext *alac = avctx->priv_data; |
|
|
|
int channels; |
|
int32_t outputsamples; |
|
int hassize; |
|
int readsamplesize; |
|
int wasted_bytes; |
|
int isnotcompressed; |
|
uint8_t interlacing_shift; |
|
uint8_t interlacing_leftweight; |
|
|
|
/* short-circuit null buffers */ |
|
if (!inbuffer || !input_buffer_size) |
|
return input_buffer_size; |
|
|
|
/* initialize from the extradata */ |
|
if (!alac->context_initialized) { |
|
if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) { |
|
av_log(avctx, AV_LOG_ERROR, "alac: expected %d extradata bytes\n", |
|
ALAC_EXTRADATA_SIZE); |
|
return input_buffer_size; |
|
} |
|
if (alac_set_info(alac)) { |
|
av_log(avctx, AV_LOG_ERROR, "alac: set_info failed\n"); |
|
return input_buffer_size; |
|
} |
|
alac->context_initialized = 1; |
|
} |
|
|
|
init_get_bits(&alac->gb, inbuffer, input_buffer_size * 8); |
|
|
|
channels = get_bits(&alac->gb, 3) + 1; |
|
if (channels > MAX_CHANNELS) { |
|
av_log(avctx, AV_LOG_ERROR, "channels > %d not supported\n", |
|
MAX_CHANNELS); |
|
return input_buffer_size; |
|
} |
|
|
|
/* 2^result = something to do with output waiting. |
|
* perhaps matters if we read > 1 frame in a pass? |
|
*/ |
|
skip_bits(&alac->gb, 4); |
|
|
|
skip_bits(&alac->gb, 12); /* unknown, skip 12 bits */ |
|
|
|
/* the output sample size is stored soon */ |
|
hassize = get_bits1(&alac->gb); |
|
|
|
wasted_bytes = get_bits(&alac->gb, 2); /* unknown ? */ |
|
|
|
/* whether the frame is compressed */ |
|
isnotcompressed = get_bits1(&alac->gb); |
|
|
|
if (hassize) { |
|
/* now read the number of samples as a 32bit integer */ |
|
outputsamples = get_bits(&alac->gb, 32); |
|
} else |
|
outputsamples = alac->setinfo_max_samples_per_frame; |
|
|
|
*outputsize = outputsamples * alac->bytespersample; |
|
readsamplesize = alac->setinfo_sample_size - (wasted_bytes * 8) + channels - 1; |
|
|
|
if (!isnotcompressed) { |
|
/* so it is compressed */ |
|
int16_t predictor_coef_table[channels][32]; |
|
int predictor_coef_num[channels]; |
|
int prediction_type[channels]; |
|
int prediction_quantitization[channels]; |
|
int ricemodifier[channels]; |
|
int i, chan; |
|
|
|
interlacing_shift = get_bits(&alac->gb, 8); |
|
interlacing_leftweight = get_bits(&alac->gb, 8); |
|
|
|
for (chan = 0; chan < channels; chan++) { |
|
prediction_type[chan] = get_bits(&alac->gb, 4); |
|
prediction_quantitization[chan] = get_bits(&alac->gb, 4); |
|
|
|
ricemodifier[chan] = get_bits(&alac->gb, 3); |
|
predictor_coef_num[chan] = get_bits(&alac->gb, 5); |
|
|
|
/* read the predictor table */ |
|
for (i = 0; i < predictor_coef_num[chan]; i++) |
|
predictor_coef_table[chan][i] = (int16_t)get_bits(&alac->gb, 16); |
|
} |
|
|
|
if (wasted_bytes) |
|
av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented, unhandling of wasted_bytes\n"); |
|
|
|
for (chan = 0; chan < channels; chan++) { |
|
bastardized_rice_decompress(alac, |
|
alac->predicterror_buffer[chan], |
|
outputsamples, |
|
readsamplesize, |
|
alac->setinfo_rice_initialhistory, |
|
alac->setinfo_rice_kmodifier, |
|
ricemodifier[chan] * alac->setinfo_rice_historymult / 4, |
|
(1 << alac->setinfo_rice_kmodifier) - 1); |
|
|
|
if (prediction_type[chan] == 0) { |
|
/* adaptive fir */ |
|
predictor_decompress_fir_adapt(alac->predicterror_buffer[chan], |
|
alac->outputsamples_buffer[chan], |
|
outputsamples, |
|
readsamplesize, |
|
predictor_coef_table[chan], |
|
predictor_coef_num[chan], |
|
prediction_quantitization[chan]); |
|
} else { |
|
av_log(avctx, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type[chan]); |
|
/* I think the only other prediction type (or perhaps this is |
|
* just a boolean?) runs adaptive fir twice.. like: |
|
* predictor_decompress_fir_adapt(predictor_error, tempout, ...) |
|
* predictor_decompress_fir_adapt(predictor_error, outputsamples ...) |
|
* little strange.. |
|
*/ |
|
} |
|
} |
|
} else { |
|
/* not compressed, easy case */ |
|
if (alac->setinfo_sample_size <= 16) { |
|
int i, chan; |
|
for (chan = 0; chan < channels; chan++) |
|
for (i = 0; i < outputsamples; i++) { |
|
int32_t audiobits; |
|
|
|
audiobits = get_bits(&alac->gb, alac->setinfo_sample_size); |
|
audiobits = extend_sign32(audiobits, readsamplesize); |
|
|
|
alac->outputsamples_buffer[chan][i] = audiobits; |
|
} |
|
} else { |
|
int i, chan; |
|
for (chan = 0; chan < channels; chan++) |
|
for (i = 0; i < outputsamples; i++) { |
|
int32_t audiobits; |
|
|
|
audiobits = get_bits(&alac->gb, 16); |
|
/* special case of sign extension.. |
|
* as we'll be ORing the low 16bits into this */ |
|
audiobits = audiobits << 16; |
|
audiobits = audiobits >> (32 - alac->setinfo_sample_size); |
|
audiobits |= get_bits(&alac->gb, alac->setinfo_sample_size - 16); |
|
|
|
alac->outputsamples_buffer[chan][i] = audiobits; |
|
} |
|
} |
|
/* wasted_bytes = 0; */ |
|
interlacing_shift = 0; |
|
interlacing_leftweight = 0; |
|
} |
|
|
|
switch(alac->setinfo_sample_size) { |
|
case 16: |
|
if (channels == 2) { |
|
reconstruct_stereo_16(alac->outputsamples_buffer, |
|
(int16_t*)outbuffer, |
|
alac->numchannels, |
|
outputsamples, |
|
interlacing_shift, |
|
interlacing_leftweight); |
|
} else { |
|
int i; |
|
for (i = 0; i < outputsamples; i++) { |
|
int16_t sample = alac->outputsamples_buffer[0][i]; |
|
((int16_t*)outbuffer)[i * alac->numchannels] = sample; |
|
} |
|
} |
|
break; |
|
case 20: |
|
case 24: |
|
// It is not clear if there exist any encoder that creates 24 bit ALAC |
|
// files. iTunes convert 24 bit raw files to 16 bit before encoding. |
|
case 32: |
|
av_log(avctx, AV_LOG_ERROR, "FIXME: unimplemented sample size %i\n", alac->setinfo_sample_size); |
|
break; |
|
default: |
|
break; |
|
} |
|
|
|
return input_buffer_size; |
|
} |
|
|
|
static av_cold int alac_decode_init(AVCodecContext * avctx) |
|
{ |
|
ALACContext *alac = avctx->priv_data; |
|
alac->avctx = avctx; |
|
alac->context_initialized = 0; |
|
|
|
alac->samplesize = alac->avctx->bits_per_sample; |
|
alac->numchannels = alac->avctx->channels; |
|
alac->bytespersample = (alac->samplesize / 8) * alac->numchannels; |
|
|
|
return 0; |
|
} |
|
|
|
static av_cold int alac_decode_close(AVCodecContext *avctx) |
|
{ |
|
ALACContext *alac = avctx->priv_data; |
|
|
|
int chan; |
|
for (chan = 0; chan < MAX_CHANNELS; chan++) { |
|
av_free(alac->predicterror_buffer[chan]); |
|
av_free(alac->outputsamples_buffer[chan]); |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
AVCodec alac_decoder = { |
|
"alac", |
|
CODEC_TYPE_AUDIO, |
|
CODEC_ID_ALAC, |
|
sizeof(ALACContext), |
|
alac_decode_init, |
|
NULL, |
|
alac_decode_close, |
|
alac_decode_frame, |
|
};
|
|
|