Merge remote-tracking branch 'qatar/master'

* qatar/master:
  dnxhddec: optimise dnxhd_decode_dct_block()
  rtp: remove disabled code
  eac3enc: use different numbers of blocks per frame to allow higher bitrates
  dnxhd: add regression test for 10-bit
  dnxhd: 10-bit support
  dsputil: update per-arch init funcs for non-h264 high bit depth
  dsputil: template get_pixels() for different bit depths
  dsputil: create 16/32-bit dctcoef versions of some functions
  jfdctint: add 10-bit version
  mov: add clcp type track as Subtitle stream.
  mpeg4: add Mpeg4 Profiles names.
  mpeg4: decode Level Profile for MPEG4 Part 2.
  ffprobe: display bitstream level.
  imgconvert: remove unused glue and xglue macros

Conflicts:
	libavcodec/dsputil_template.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
pull/2/head
Michael Niedermayer 14 years ago
commit 4095fa9038
  1. 1
      ffprobe.c
  2. 179
      libavcodec/ac3enc.c
  3. 2
      libavcodec/ac3enc.h
  4. 2
      libavcodec/ac3enc_fixed.c
  5. 2
      libavcodec/ac3enc_float.c
  6. 24
      libavcodec/ac3enc_template.c
  7. 5
      libavcodec/alpha/dsputil_alpha.c
  8. 2
      libavcodec/arm/dsputil_init_arm.c
  9. 5
      libavcodec/arm/dsputil_init_armv6.c
  10. 2
      libavcodec/arm/dsputil_init_neon.c
  11. 2
      libavcodec/arm/dsputil_iwmmxt.c
  12. 17
      libavcodec/avcodec.h
  13. 10
      libavcodec/bfin/dsputil_bfin.c
  14. 2
      libavcodec/dct-test.c
  15. 202
      libavcodec/dnxhddata.c
  16. 2
      libavcodec/dnxhddata.h
  17. 159
      libavcodec/dnxhddec.c
  18. 240
      libavcodec/dnxhdenc.c
  19. 4
      libavcodec/dnxhdenc.h
  20. 72
      libavcodec/dsputil.c
  21. 11
      libavcodec/dsputil.h
  22. 129
      libavcodec/dsputil_template.c
  23. 30
      libavcodec/eac3enc.c
  24. 1
      libavcodec/h264.c
  25. 3
      libavcodec/imgconvert.c
  26. 413
      libavcodec/jfdctint.c
  27. 405
      libavcodec/jfdctint_template.c
  28. 5
      libavcodec/mlib/dsputil_mlib.c
  29. 39
      libavcodec/mpeg4videodec.c
  30. 3
      libavcodec/mpegvideo_enc.c
  31. 7
      libavcodec/ppc/dsputil_altivec.c
  32. 7
      libavcodec/ppc/dsputil_ppc.c
  33. 2
      libavcodec/ppc/h264_altivec.c
  34. 4
      libavcodec/ps2/dsputil_mmi.c
  35. 2
      libavcodec/sh4/dsputil_align.c
  36. 2
      libavcodec/sh4/dsputil_sh4.c
  37. 2
      libavcodec/sparc/dsputil_vis.c
  38. 3
      libavcodec/x86/dnxhd_mmx.c
  39. 2
      libavcodec/x86/dsputil_mmx.c
  40. 10
      libavcodec/x86/dsputilenc_mmx.c
  41. 2
      libavformat/mov.c
  42. 16
      libavformat/rtpdec.c
  43. 23
      libavformat/rtpproto.c
  44. 5
      tests/codec-regression.sh
  45. 4
      tests/ref/vsynth1/dnxhd_720p_10bit
  46. 4
      tests/ref/vsynth2/dnxhd_720p_10bit

@ -202,6 +202,7 @@ static void show_stream(AVFormatContext *fmt_ctx, int stream_idx)
}
printf("pix_fmt=%s\n", dec_ctx->pix_fmt != PIX_FMT_NONE ?
av_pix_fmt_descriptors[dec_ctx->pix_fmt].name : "unknown");
printf("level=%d\n", dec_ctx->level);
break;
case AVMEDIA_TYPE_AUDIO:

@ -186,7 +186,7 @@ void ff_ac3_adjust_frame_size(AC3EncodeContext *s)
s->frame_size = s->frame_size_min +
2 * (s->bits_written * s->sample_rate < s->samples_written * s->bit_rate);
s->bits_written += s->frame_size * 8;
s->samples_written += AC3_FRAME_SIZE;
s->samples_written += AC3_BLOCK_SIZE * s->num_blocks;
}
@ -198,7 +198,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s)
/* set coupling use flags for each block/channel */
/* TODO: turn coupling on/off and adjust start band based on bit usage */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 1; ch <= s->fbw_channels; ch++)
block->channel_in_cpl[ch] = s->cpl_on;
@ -208,7 +208,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s)
enabled for that block */
got_cpl_snr = 0;
num_cpl_blocks = 0;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
block->num_cpl_channels = 0;
for (ch = 1; ch <= s->fbw_channels; ch++)
@ -244,7 +244,7 @@ void ff_ac3_compute_coupling_strategy(AC3EncodeContext *s)
s->cpl_on = 0;
/* set bandwidth for each channel */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (block->channel_in_cpl[ch])
@ -269,7 +269,7 @@ void ff_ac3_apply_rematrixing(AC3EncodeContext *s)
if (!s->rematrixing_enabled)
return;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
if (block->new_rematrixing_strategy)
flags = block->rematrixing_flags;
@ -318,7 +318,7 @@ static av_cold void exponent_init(AC3EncodeContext *s)
static void extract_exponents(AC3EncodeContext *s)
{
int ch = !s->cpl_on;
int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS * (s->channels - ch + 1);
int chan_size = AC3_MAX_COEFS * s->num_blocks * (s->channels - ch + 1);
AC3Block *block = &s->blocks[0];
s->ac3dsp.extract_exponents(block->exp[ch], block->fixed_coef[ch], chan_size);
@ -331,6 +331,15 @@ static void extract_exponents(AC3EncodeContext *s)
*/
#define EXP_DIFF_THRESHOLD 500
/**
* Table used to select exponent strategy based on exponent reuse block interval.
*/
static const uint8_t exp_strategy_reuse_tab[4][6] = {
{ EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15 },
{ EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15, EXP_D15 },
{ EXP_D25, EXP_D25, EXP_D15, EXP_D15, EXP_D15, EXP_D15 },
{ EXP_D45, EXP_D25, EXP_D25, EXP_D15, EXP_D15, EXP_D15 }
};
/**
* Calculate exponent strategies for all channels.
@ -349,7 +358,7 @@ static void compute_exp_strategy(AC3EncodeContext *s)
reused in the next frame */
exp_strategy[0] = EXP_NEW;
exp += AC3_MAX_COEFS;
for (blk = 1; blk < AC3_MAX_BLOCKS; blk++, exp += AC3_MAX_COEFS) {
for (blk = 1; blk < s->num_blocks; blk++, exp += AC3_MAX_COEFS) {
if (ch == CPL_CH) {
if (!s->blocks[blk-1].cpl_in_use) {
exp_strategy[blk] = EXP_NEW;
@ -373,23 +382,18 @@ static void compute_exp_strategy(AC3EncodeContext *s)
/* now select the encoding strategy type : if exponents are often
recoded, we use a coarse encoding */
blk = 0;
while (blk < AC3_MAX_BLOCKS) {
while (blk < s->num_blocks) {
blk1 = blk + 1;
while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE)
while (blk1 < s->num_blocks && exp_strategy[blk1] == EXP_REUSE)
blk1++;
switch (blk1 - blk) {
case 1: exp_strategy[blk] = EXP_D45; break;
case 2:
case 3: exp_strategy[blk] = EXP_D25; break;
default: exp_strategy[blk] = EXP_D15; break;
}
exp_strategy[blk] = exp_strategy_reuse_tab[s->num_blks_code][blk1-blk-1];
blk = blk1;
}
}
if (s->lfe_on) {
ch = s->lfe_channel;
s->exp_strategy[ch][0] = EXP_D15;
for (blk = 1; blk < AC3_MAX_BLOCKS; blk++)
for (blk = 1; blk < s->num_blocks; blk++)
s->exp_strategy[ch][blk] = EXP_REUSE;
}
@ -487,7 +491,7 @@ static void encode_exponents(AC3EncodeContext *s)
cpl = (ch == CPL_CH);
blk = 0;
while (blk < AC3_MAX_BLOCKS) {
while (blk < s->num_blocks) {
AC3Block *block = &s->blocks[blk];
if (cpl && !block->cpl_in_use) {
exp += AC3_MAX_COEFS;
@ -500,7 +504,7 @@ static void encode_exponents(AC3EncodeContext *s)
/* count the number of EXP_REUSE blocks after the current block
and set exponent reference block numbers */
s->exp_ref_block[ch][blk] = blk;
while (blk1 < AC3_MAX_BLOCKS && exp_strategy[blk1] == EXP_REUSE) {
while (blk1 < s->num_blocks && exp_strategy[blk1] == EXP_REUSE) {
s->exp_ref_block[ch][blk1] = blk;
blk1++;
}
@ -536,7 +540,7 @@ static void group_exponents(AC3EncodeContext *s)
int exp0, exp1;
bit_count = 0;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
int exp_strategy = s->exp_strategy[ch][blk];
@ -625,30 +629,38 @@ static void count_frame_bits_fixed(AC3EncodeContext *s)
if (s->eac3) {
/* bitstream info header */
frame_bits += 35;
frame_bits += 1 + 1 + 1;
frame_bits += 1 + 1;
if (s->num_blocks != 0x6)
frame_bits++;
frame_bits++;
/* audio frame header */
frame_bits += 2;
if (s->num_blocks == 6)
frame_bits += 2;
frame_bits += 10;
/* exponent strategy */
if (s->use_frame_exp_strategy)
frame_bits += 5 * s->fbw_channels;
else
frame_bits += AC3_MAX_BLOCKS * 2 * s->fbw_channels;
frame_bits += s->num_blocks * 2 * s->fbw_channels;
if (s->lfe_on)
frame_bits += AC3_MAX_BLOCKS;
frame_bits += s->num_blocks;
/* converter exponent strategy */
frame_bits += s->fbw_channels * 5;
if (s->num_blks_code != 0x3)
frame_bits++;
else
frame_bits += s->fbw_channels * 5;
/* snr offsets */
frame_bits += 10;
/* block start info */
frame_bits++;
if (s->num_blocks != 1)
frame_bits++;
} else {
frame_bits += 49;
frame_bits += frame_bits_inc[s->channel_mode];
}
/* audio blocks */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
if (!s->eac3) {
/* block switch flags */
frame_bits += s->fbw_channels;
@ -750,7 +762,7 @@ static void count_frame_bits(AC3EncodeContext *s)
/* coupling */
if (s->channel_mode > AC3_CHMODE_MONO) {
frame_bits++;
for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 1; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
frame_bits++;
if (block->new_cpl_strategy)
@ -762,7 +774,7 @@ static void count_frame_bits(AC3EncodeContext *s)
if (s->use_frame_exp_strategy) {
frame_bits += 5 * s->cpl_on;
} else {
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
for (blk = 0; blk < s->num_blocks; blk++)
frame_bits += 2 * s->blocks[blk].cpl_in_use;
}
}
@ -778,7 +790,7 @@ static void count_frame_bits(AC3EncodeContext *s)
}
/* audio blocks */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
/* coupling strategy */
@ -865,7 +877,7 @@ static void bit_alloc_masking(AC3EncodeContext *s)
{
int blk, ch;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
/* We only need psd and mask for calculating bap.
@ -901,9 +913,9 @@ static void reset_block_bap(AC3EncodeContext *s)
ref_bap = s->bap_buffer;
for (ch = 0; ch <= s->channels; ch++) {
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
for (blk = 0; blk < s->num_blocks; blk++)
s->ref_bap[ch][blk] = ref_bap + AC3_MAX_COEFS * s->exp_ref_block[ch][blk];
ref_bap += AC3_MAX_COEFS * AC3_MAX_BLOCKS;
ref_bap += AC3_MAX_COEFS * s->num_blocks;
}
s->ref_bap_set = 1;
}
@ -936,7 +948,7 @@ static void count_mantissa_bits_update_ch(AC3EncodeContext *s, int ch,
{
int blk;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
if (ch == CPL_CH && !block->cpl_in_use)
continue;
@ -980,7 +992,7 @@ static int bit_alloc(AC3EncodeContext *s, int snr_offset)
snr_offset = (snr_offset - 240) << 2;
reset_block_bap(s);
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = !block->cpl_in_use; ch <= s->channels; ch++) {
@ -1194,7 +1206,7 @@ void ff_ac3_quantize_mantissas(AC3EncodeContext *s)
{
int blk, ch, ch0=0, got_cpl;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
AC3Mant m = { 0 };
@ -1557,7 +1569,7 @@ void ff_ac3_output_frame(AC3EncodeContext *s, unsigned char *frame)
s->output_frame_header(s);
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
for (blk = 0; blk < s->num_blocks; blk++)
output_audio_block(s, blk);
output_frame_end(s);
@ -1585,6 +1597,7 @@ static void dprint_options(AC3EncodeContext *s)
av_dlog(avctx, "channel_layout: %s\n", strbuf);
av_dlog(avctx, "sample_rate: %d\n", s->sample_rate);
av_dlog(avctx, "bit_rate: %d\n", s->bit_rate);
av_dlog(avctx, "blocks/frame: %d (code=%d)\n", s->num_blocks, s->num_blks_code);
if (s->cutoff)
av_dlog(avctx, "cutoff: %d\n", s->cutoff);
@ -1851,7 +1864,7 @@ av_cold int ff_ac3_encode_close(AVCodecContext *avctx)
av_freep(&s->qmant_buffer);
av_freep(&s->cpl_coord_exp_buffer);
av_freep(&s->cpl_coord_mant_buffer);
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
av_freep(&block->mdct_coef);
av_freep(&block->fixed_coef);
@ -1958,18 +1971,30 @@ static av_cold int validate_options(AC3EncodeContext *s)
/* validate bit rate */
if (s->eac3) {
int max_br, min_br, wpf, min_br_dist, min_br_code;
int num_blks_code, num_blocks, frame_samples;
/* calculate min/max bitrate */
max_br = 2048 * s->sample_rate / AC3_FRAME_SIZE * 16;
min_br = ((s->sample_rate + (AC3_FRAME_SIZE-1)) / AC3_FRAME_SIZE) * 16;
/* TODO: More testing with 3 and 2 blocks. All E-AC-3 samples I've
found use either 6 blocks or 1 block, even though 2 or 3 blocks
would work as far as the bit rate is concerned. */
for (num_blks_code = 3; num_blks_code >= 0; num_blks_code--) {
num_blocks = ((int[]){ 1, 2, 3, 6 })[num_blks_code];
frame_samples = AC3_BLOCK_SIZE * num_blocks;
max_br = 2048 * s->sample_rate / frame_samples * 16;
min_br = ((s->sample_rate + (frame_samples-1)) / frame_samples) * 16;
if (avctx->bit_rate <= max_br)
break;
}
if (avctx->bit_rate < min_br || avctx->bit_rate > max_br) {
av_log(avctx, AV_LOG_ERROR, "invalid bit rate. must be %d to %d "
"for this sample rate\n", min_br, max_br);
return AVERROR(EINVAL);
}
s->num_blks_code = num_blks_code;
s->num_blocks = num_blocks;
/* calculate words-per-frame for the selected bitrate */
wpf = (avctx->bit_rate / 16) * AC3_FRAME_SIZE / s->sample_rate;
wpf = (avctx->bit_rate / 16) * frame_samples / s->sample_rate;
av_assert1(wpf > 0 && wpf <= 2048);
/* find the closest AC-3 bitrate code to the selected bitrate.
@ -2001,6 +2026,8 @@ static av_cold int validate_options(AC3EncodeContext *s)
}
s->frame_size_code = i << 1;
s->frame_size_min = 2 * ff_ac3_frame_size_tab[s->frame_size_code][s->bit_alloc.sr_code];
s->num_blks_code = 0x3;
s->num_blocks = 6;
}
s->bit_rate = avctx->bit_rate;
s->frame_size = s->frame_size_min;
@ -2065,13 +2092,13 @@ static av_cold void set_bandwidth(AC3EncodeContext *s)
/* set number of coefficients for each channel */
for (ch = 1; ch <= s->fbw_channels; ch++) {
s->start_freq[ch] = 0;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
for (blk = 0; blk < s->num_blocks; blk++)
s->blocks[blk].end_freq[ch] = s->bandwidth_code * 3 + 73;
}
/* LFE channel always has 7 coefs */
if (s->lfe_on) {
s->start_freq[s->lfe_channel] = 0;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
for (blk = 0; blk < s->num_blocks; blk++)
s->blocks[blk].end_freq[ch] = 7;
}
@ -2108,7 +2135,7 @@ static av_cold void set_bandwidth(AC3EncodeContext *s)
s->start_freq[CPL_CH] = cpl_start_band * 12 + 37;
s->cpl_end_freq = cpl_end_band * 12 + 37;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
for (blk = 0; blk < s->num_blocks; blk++)
s->blocks[blk].end_freq[CPL_CH] = s->cpl_end_freq;
}
}
@ -2119,35 +2146,37 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
AVCodecContext *avctx = s->avctx;
int blk, ch;
int channels = s->channels + 1; /* includes coupling channel */
int channel_blocks = channels * s->num_blocks;
int total_coefs = AC3_MAX_COEFS * channel_blocks;
if (s->allocate_sample_buffers(s))
goto alloc_fail;
FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->bap_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->bap1_buffer), alloc_fail);
FF_ALLOCZ_OR_GOTO(avctx, s->mdct_coef_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->mdct_coef_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->exp_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, AC3_MAX_BLOCKS * channels *
128 * sizeof(*s->grouped_exp_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->psd_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, AC3_MAX_BLOCKS * channels *
64 * sizeof(*s->band_psd_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, AC3_MAX_BLOCKS * channels *
64 * sizeof(*s->mask_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->qmant_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->bap_buffer, total_coefs *
sizeof(*s->bap_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->bap1_buffer, total_coefs *
sizeof(*s->bap1_buffer), alloc_fail);
FF_ALLOCZ_OR_GOTO(avctx, s->mdct_coef_buffer, total_coefs *
sizeof(*s->mdct_coef_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->exp_buffer, total_coefs *
sizeof(*s->exp_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->grouped_exp_buffer, channel_blocks * 128 *
sizeof(*s->grouped_exp_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->psd_buffer, total_coefs *
sizeof(*s->psd_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->band_psd_buffer, channel_blocks * 64 *
sizeof(*s->band_psd_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->mask_buffer, channel_blocks * 64 *
sizeof(*s->mask_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->qmant_buffer, total_coefs *
sizeof(*s->qmant_buffer), alloc_fail);
if (s->cpl_enabled) {
FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, AC3_MAX_BLOCKS * channels *
16 * sizeof(*s->cpl_coord_exp_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, AC3_MAX_BLOCKS * channels *
16 * sizeof(*s->cpl_coord_mant_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_exp_buffer, channel_blocks * 16 *
sizeof(*s->cpl_coord_exp_buffer), alloc_fail);
FF_ALLOC_OR_GOTO(avctx, s->cpl_coord_mant_buffer, channel_blocks * 16 *
sizeof(*s->cpl_coord_mant_buffer), alloc_fail);
}
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
FF_ALLOCZ_OR_GOTO(avctx, block->mdct_coef, channels * sizeof(*block->mdct_coef),
alloc_fail);
@ -2183,23 +2212,23 @@ static av_cold int allocate_buffers(AC3EncodeContext *s)
}
/* arrangement: channel, block, coeff */
block->exp[ch] = &s->exp_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
block->exp[ch] = &s->exp_buffer [AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
block->mdct_coef[ch] = &s->mdct_coef_buffer [AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
}
}
if (!s->fixed_point) {
FF_ALLOCZ_OR_GOTO(avctx, s->fixed_coef_buffer, AC3_MAX_BLOCKS * channels *
AC3_MAX_COEFS * sizeof(*s->fixed_coef_buffer), alloc_fail);
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
FF_ALLOCZ_OR_GOTO(avctx, s->fixed_coef_buffer, total_coefs *
sizeof(*s->fixed_coef_buffer), alloc_fail);
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels *
sizeof(*block->fixed_coef), alloc_fail);
for (ch = 0; ch < channels; ch++)
block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (AC3_MAX_BLOCKS * ch + blk)];
block->fixed_coef[ch] = &s->fixed_coef_buffer[AC3_MAX_COEFS * (s->num_blocks * ch + blk)];
}
} else {
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
FF_ALLOCZ_OR_GOTO(avctx, block->fixed_coef, channels *
sizeof(*block->fixed_coef), alloc_fail);
@ -2226,14 +2255,14 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx)
s->eac3 = avctx->codec_id == CODEC_ID_EAC3;
avctx->frame_size = AC3_FRAME_SIZE;
ff_ac3_common_init();
ret = validate_options(s);
if (ret)
return ret;
avctx->frame_size = AC3_BLOCK_SIZE * s->num_blocks;
s->bitstream_mode = avctx->audio_service_type;
if (s->bitstream_mode == AV_AUDIO_SERVICE_TYPE_KARAOKE)
s->bitstream_mode = 0x7;

@ -152,6 +152,8 @@ typedef struct AC3EncodeContext {
int bit_rate; ///< target bit rate, in bits-per-second
int sample_rate; ///< sampling frequency, in Hz
int num_blks_code; ///< number of blocks code (numblkscod)
int num_blocks; ///< number of blocks per frame
int frame_size_min; ///< minimum frame size in case rounding is necessary
int frame_size; ///< current frame size in bytes
int frame_size_code; ///< frame size code (frmsizecod)

@ -93,7 +93,7 @@ static void scale_coefficients(AC3EncodeContext *s)
{
int blk, ch;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 1; ch <= s->channels; ch++) {
s->ac3dsp.ac3_rshift_int32(block->mdct_coef[ch], AC3_MAX_COEFS,

@ -103,7 +103,7 @@ static int normalize_samples(AC3EncodeContext *s)
*/
static void scale_coefficients(AC3EncodeContext *s)
{
int chan_size = AC3_MAX_COEFS * AC3_MAX_BLOCKS;
int chan_size = AC3_MAX_COEFS * s->num_blocks;
s->ac3dsp.float_to_fixed24(s->fixed_coef_buffer + chan_size,
s->mdct_coef_buffer + chan_size,
chan_size * s->channels);

@ -79,13 +79,13 @@ static void deinterleave_input_samples(AC3EncodeContext *s,
int sinc;
/* copy last 256 samples of previous frame to the start of the current frame */
memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_FRAME_SIZE],
memcpy(&s->planar_samples[ch][0], &s->planar_samples[ch][AC3_BLOCK_SIZE * s->num_blocks],
AC3_BLOCK_SIZE * sizeof(s->planar_samples[0][0]));
/* deinterleave */
sinc = s->channels;
sptr = samples + s->channel_map[ch];
for (i = AC3_BLOCK_SIZE; i < AC3_FRAME_SIZE+AC3_BLOCK_SIZE; i++) {
for (i = AC3_BLOCK_SIZE; i < AC3_BLOCK_SIZE * (s->num_blocks + 1); i++) {
s->planar_samples[ch][i] = *sptr;
sptr += sinc;
}
@ -103,7 +103,7 @@ static void apply_mdct(AC3EncodeContext *s)
int blk, ch;
for (ch = 0; ch < s->channels; ch++) {
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
const SampleType *input_samples = &s->planar_samples[ch][blk * AC3_BLOCK_SIZE];
@ -159,7 +159,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
cpl_start = FFMIN(256, cpl_start + num_cpl_coefs) - num_cpl_coefs;
/* calculate coupling channel from fbw channels */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
CoefType *cpl_coef = &block->mdct_coef[CPL_CH][cpl_start];
if (!block->cpl_in_use)
@ -188,7 +188,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
while (i < s->cpl_end_freq) {
int band_size = s->cpl_band_sizes[bnd];
for (ch = CPL_CH; ch <= s->fbw_channels; ch++) {
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
if (!block->cpl_in_use || (ch > CPL_CH && !block->channel_in_cpl[ch]))
continue;
@ -203,7 +203,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
}
/* determine which blocks to send new coupling coordinates for */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
AC3Block *block0 = blk ? &s->blocks[blk-1] : NULL;
int new_coords = 0;
@ -261,7 +261,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
coordinates in successive blocks */
for (bnd = 0; bnd < s->num_cpl_bands; bnd++) {
blk = 0;
while (blk < AC3_MAX_BLOCKS) {
while (blk < s->num_blocks) {
int blk1;
CoefSumType energy_cpl;
AC3Block *block = &s->blocks[blk];
@ -273,7 +273,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
energy_cpl = energy[blk][CPL_CH][bnd];
blk1 = blk+1;
while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
while (!s->blocks[blk1].new_cpl_coords && blk1 < s->num_blocks) {
if (s->blocks[blk1].cpl_in_use)
energy_cpl += energy[blk1][CPL_CH][bnd];
blk1++;
@ -285,7 +285,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
continue;
energy_ch = energy[blk][ch][bnd];
blk1 = blk+1;
while (!s->blocks[blk1].new_cpl_coords && blk1 < AC3_MAX_BLOCKS) {
while (!s->blocks[blk1].new_cpl_coords && blk1 < s->num_blocks) {
if (s->blocks[blk1].cpl_in_use)
energy_ch += energy[blk1][ch][bnd];
blk1++;
@ -297,7 +297,7 @@ static void apply_channel_coupling(AC3EncodeContext *s)
}
/* calculate exponents/mantissas for coupling coordinates */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
if (!block->cpl_in_use || !block->new_cpl_coords)
continue;
@ -362,7 +362,7 @@ static void compute_rematrixing_strategy(AC3EncodeContext *s)
if (s->channel_mode != AC3_CHMODE_STEREO)
return;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
block = &s->blocks[blk];
block->new_rematrixing_strategy = !blk;
@ -440,7 +440,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, unsigned char *frame,
scale_coefficients(s);
clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1],
AC3_MAX_COEFS * AC3_MAX_BLOCKS * s->channels);
AC3_MAX_COEFS * s->num_blocks * s->channels);
s->cpl_on = s->cpl_enabled;
ff_ac3_compute_coupling_strategy(s);

@ -270,7 +270,7 @@ static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
@ -321,7 +321,8 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
c->get_pixels = get_pixels_mvi;
if (!high_bit_depth)
c->get_pixels = get_pixels_mvi;
c->diff_pixels = diff_pixels_mvi;
c->sad[0] = pix_abs16x16_mvi_asm;
c->sad[1] = pix_abs8x8_mvi;

@ -75,7 +75,7 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
ff_put_pixels_clamped = c->put_pixels_clamped;
ff_add_pixels_clamped = c->add_pixels_clamped;

@ -72,7 +72,7 @@ int ff_pix_sum_armv6(uint8_t *pix, int line_size);
void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 &&
(avctx->idct_algo == FF_IDCT_AUTO ||
@ -106,8 +106,9 @@ void av_cold ff_dsputil_init_armv6(DSPContext* c, AVCodecContext *avctx)
c->avg_pixels_tab[1][0] = ff_avg_pixels8_armv6;
}
if (!high_bit_depth)
c->get_pixels = ff_get_pixels_armv6;
c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
c->get_pixels = ff_get_pixels_armv6;
c->diff_pixels = ff_diff_pixels_armv6;
c->pix_abs[0][0] = ff_pix_abs16_armv6;

@ -175,7 +175,7 @@ void ff_apply_window_int16_neon(int16_t *dst, const int16_t *src,
void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) {
if (avctx->idct_algo == FF_IDCT_AUTO ||

@ -155,7 +155,7 @@ static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
void ff_dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = AV_CPU_FLAG_IWMMXT; /* multimedia extension flags */
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (avctx->dsp_mask) {
if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)

@ -2278,6 +2278,23 @@ typedef struct AVCodecContext {
#define FF_PROFILE_VC1_COMPLEX 2
#define FF_PROFILE_VC1_ADVANCED 3
#define FF_PROFILE_MPEG4_SIMPLE 0
#define FF_PROFILE_MPEG4_SIMPLE_SCALABLE 1
#define FF_PROFILE_MPEG4_CORE 2
#define FF_PROFILE_MPEG4_MAIN 3
#define FF_PROFILE_MPEG4_N_BIT 4
#define FF_PROFILE_MPEG4_SCALABLE_TEXTURE 5
#define FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION 6
#define FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE 7
#define FF_PROFILE_MPEG4_HYBRID 8
#define FF_PROFILE_MPEG4_ADVANCED_REAL_TIME 9
#define FF_PROFILE_MPEG4_CORE_SCALABLE 10
#define FF_PROFILE_MPEG4_ADVANCED_CODING 11
#define FF_PROFILE_MPEG4_ADVANCED_CORE 12
#define FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE 13
#define FF_PROFILE_MPEG4_SIMPLE_STUDIO 14
#define FF_PROFILE_MPEG4_ADVANCED_SIMPLE 15
/**
* level
* - encoding: Set by user.

@ -197,14 +197,14 @@ static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_si
void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
c->get_pixels = ff_bfin_get_pixels;
c->diff_pixels = ff_bfin_diff_pixels;
c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
if (!high_bit_depth)
c->get_pixels = ff_bfin_get_pixels;
c->clear_blocks = bfin_clear_blocks;
c->pix_sum = ff_bfin_pix_sum;
c->pix_norm1 = ff_bfin_pix_norm1;
@ -253,10 +253,10 @@ void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
/* c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */
}
if (avctx->dct_algo == FF_DCT_AUTO)
c->fdct = ff_bfin_fdct;
if (avctx->bits_per_raw_sample <= 8) {
if (avctx->dct_algo == FF_DCT_AUTO)
c->fdct = ff_bfin_fdct;
if (avctx->idct_algo == FF_IDCT_VP3) {
c->idct_permutation_type = FF_NO_IDCT_PERM;
c->idct = ff_bfin_vp3_idct;

@ -88,7 +88,7 @@ static const struct algo fdct_tab[] = {
{ "REF-DBL", ff_ref_fdct, NO_PERM },
{ "FAAN", ff_faandct, FAAN_SCALE },
{ "IJG-AAN-INT", fdct_ifast, SCALE_PERM },
{ "IJG-LLM-INT", ff_jpeg_fdct_islow, NO_PERM },
{ "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM },
#if HAVE_MMX
{ "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX },

@ -22,6 +22,28 @@
#include "avcodec.h"
#include "dnxhddata.h"
static const uint8_t dnxhd_1235_luma_weight[] = {
0, 32, 32, 32, 33, 35, 38, 39,
32, 33, 32, 33, 36, 36, 39, 42,
32, 32, 33, 36, 35, 37, 41, 43,
31, 33, 34, 36, 36, 40, 42, 48,
32, 34, 36, 37, 39, 42, 46, 51,
36, 37, 37, 39, 41, 46, 51, 55,
37, 39, 41, 41, 47, 50, 55, 56,
41, 42, 41, 44, 50, 53, 60, 60
};
static const uint8_t dnxhd_1235_chroma_weight[] = {
0, 32, 33, 34, 39, 41, 54, 59,
33, 34, 35, 38, 43, 49, 58, 84,
34, 37, 39, 44, 46, 55, 74, 87,
40, 42, 47, 48, 58, 70, 87, 86,
43, 50, 56, 63, 72, 94, 91, 82,
55, 63, 65, 75, 93, 89, 85, 73,
61, 67, 82, 81, 83, 90, 79, 73,
74, 84, 75, 78, 90, 85, 73, 73
};
static const uint8_t dnxhd_1237_luma_weight[] = {
0, 32, 33, 34, 34, 36, 37, 36,
36, 37, 38, 38, 38, 39, 41, 44,
@ -132,6 +154,28 @@ static const uint8_t dnxhd_1243_chroma_weight[] = {
46, 45, 46, 47, 47, 48, 47, 47,
};
static const uint8_t dnxhd_1250_luma_weight[] = {
0, 32, 35, 35, 36, 36, 41, 43,
32, 34, 35, 36, 37, 39, 43, 47,
33, 34, 36, 38, 38, 42, 42, 50,
34, 36, 38, 38, 41, 40, 47, 54,
35, 38, 39, 40, 39, 45, 49, 58,
38, 39, 40, 39, 46, 47, 54, 60,
38, 39, 41, 46, 46, 48, 57, 62,
40, 41, 44, 45, 49, 54, 63, 63
};
static const uint8_t dnxhd_1250_chroma_weight[] = {
0, 32, 35, 36, 40, 42, 51, 51,
35, 36, 39, 39, 43, 51, 52, 55,
36, 41, 41, 43, 51, 53, 54, 56,
43, 44, 45, 50, 54, 54, 55, 57,
45, 48, 50, 51, 55, 58, 59, 58,
49, 52, 49, 57, 58, 62, 58, 60,
51, 51, 56, 58, 62, 61, 59, 62,
52, 52, 60, 61, 59, 59, 63, 63
};
static const uint8_t dnxhd_1251_luma_weight[] = {
0, 32, 32, 34, 34, 34, 34, 35,
35, 35, 36, 37, 36, 36, 35, 36,
@ -604,6 +648,146 @@ static const uint8_t dnxhd_1235_1241_run[62] = {
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,
};
static const uint8_t dnxhd_1250_dc_codes[14] = {
10, 62, 11, 12, 13, 0, 1, 2, 3, 4, 14, 30, 126, 127
};
static const uint8_t dnxhd_1250_dc_bits[14] = {
4, 6, 4, 4, 4, 3, 3, 3, 3, 3, 4, 5, 7, 7
};
static const uint16_t dnxhd_1250_ac_codes[257] = {
0, 1, 4, 10, 11, 24, 25, 26,
54, 55, 56, 57, 116, 117, 118, 119,
240, 241, 242, 243, 244, 245, 492, 493,
494, 495, 496, 497, 498, 998, 999, 1000,
1001, 1002, 1003, 1004, 1005, 1006, 2014, 2015,
2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023,
2024, 2025, 4052, 4053, 4054, 4055, 4056, 4057,
4058, 4059, 4060, 4061, 4062, 4063, 4064, 4065,
4066, 4067, 8136, 8137, 8138, 8139, 8140, 8141,
8142, 8143, 8144, 8145, 8146, 8147, 8148, 8149,
8150, 8151, 8152, 8153, 8154, 8155, 8156, 16314,
16315, 16316, 16317, 16318, 16319, 16320, 16321, 16322,
16323, 16324, 16325, 16326, 16327, 16328, 16329, 16330,
16331, 16332, 16333, 16334, 16335, 16336, 16337, 16338,
32678, 32679, 32680, 32681, 32682, 32683, 32684, 32685,
32686, 32687, 32688, 32689, 32690, 32691, 32692, 32693,
32694, 32695, 32696, 32697, 32698, 32699, 32700, 32701,
32702, 32703, 32704, 32705, 32706, 32707, 32708, 32709,
32710, 32711, 32712, 65426, 65427, 65428, 65429, 65430,
65431, 65432, 65433, 65434, 65435, 65436, 65437, 65438,
65439, 65440, 65441, 65442, 65443, 65444, 65445, 65446,
65447, 65448, 65449, 65450, 65451, 65452, 65453, 65454,
65455, 65456, 65457, 65458, 65459, 65460, 65461, 65462,
65463, 65464, 65465, 65466, 65467, 65468, 65469, 65470,
65471, 65472, 65473, 65474, 65475, 65476, 65477, 65478,
65479, 65480, 65481, 65482, 65483, 65484, 65485, 65486,
65487, 65488, 65489, 65490, 65491, 65492, 65493, 65494,
65495, 65496, 65497, 65498, 65499, 65500, 65501, 65502,
65503, 65504, 65505, 65506, 65507, 65508, 65509, 65510,
65511, 65512, 65513, 65514, 65515, 65516, 65517, 65518,
65519, 65520, 65521, 65522, 65523, 65524, 65525, 65526,
65527, 65528, 65529, 65530, 65531, 65532, 65533, 65534,
65535
};
static const uint8_t dnxhd_1250_ac_bits[257] = {
2, 2, 3, 4, 4, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10,
10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16
};
static const uint8_t dnxhd_1250_ac_level[257] = {
1, 1, 2, 3, 0, 4, 5, 2, 6, 7, 8, 3, 9, 10, 11, 4,
12, 13, 14, 15, 16, 5, 17, 18, 19, 20, 21, 22, 6, 23, 24, 25,
26, 27, 28, 29, 7, 8, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
9, 10, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 11,
12, 13, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 1, 2,
3, 4, 5, 14, 15, 16, 17, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 18, 19, 20, 21,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 55, 56, 22, 23, 24,
25, 26, 27, 54, 57, 58, 59, 60, 61, 62, 63, 64, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64
};
static const uint8_t dnxhd_1250_ac_run_flag[257] = {
0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1,
0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1
};
static const uint8_t dnxhd_1250_ac_index_flag[257] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1
};
static const uint16_t dnxhd_1250_run_codes[62] = {
0, 4, 5, 12, 26, 27, 28, 58,
118, 119, 120, 242, 486, 487, 976, 977,
978, 979, 980, 981, 982, 983, 984, 985,
986, 987, 988, 989, 990, 991, 992, 993,
994, 995, 996, 997, 998, 999, 1000, 1001,
1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009,
1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017,
1018, 1019, 1020, 1021, 1022, 1023
};
static const uint8_t dnxhd_1250_run_bits[62] = {
1, 3, 3, 4, 5, 5, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10
};
static const uint8_t dnxhd_1250_run[62] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62
};
static const uint8_t dnxhd_1251_dc_codes[12] = {
0, 12, 13, 1, 2, 3, 4, 5, 14, 30, 62, 63,
};
@ -878,6 +1062,13 @@ static const uint8_t dnxhd_1252_ac_index_flag[257] = {
};
const CIDEntry ff_dnxhd_cid_table[] = {
{ 1235, 1920, 1080, 0, 917504, 917504, 6, 10,
dnxhd_1235_luma_weight, dnxhd_1235_chroma_weight,
dnxhd_1235_1241_dc_codes, dnxhd_1235_1241_dc_bits,
dnxhd_1235_1241_ac_codes, dnxhd_1235_1241_ac_bits, dnxhd_1235_1241_ac_level,
dnxhd_1235_1241_ac_run_flag, dnxhd_1235_1241_ac_index_flag,
dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1235_1241_run,
{ 175, 185, 365, 440 } },
{ 1237, 1920, 1080, 0, 606208, 606208, 4, 8,
dnxhd_1237_luma_weight, dnxhd_1237_chroma_weight,
dnxhd_1237_dc_codes, dnxhd_1237_dc_bits,
@ -913,6 +1104,13 @@ const CIDEntry ff_dnxhd_cid_table[] = {
dnxhd_1238_ac_run_flag, dnxhd_1238_ac_index_flag,
dnxhd_1235_1238_1241_run_codes, dnxhd_1235_1238_1241_run_bits, dnxhd_1238_run,
{ 185, 220 } },
{ 1250, 1280, 720, 0, 458752, 458752, 6, 10,
dnxhd_1250_luma_weight, dnxhd_1250_chroma_weight,
dnxhd_1250_dc_codes, dnxhd_1250_dc_bits,
dnxhd_1250_ac_codes, dnxhd_1250_ac_bits, dnxhd_1250_ac_level,
dnxhd_1250_ac_run_flag, dnxhd_1250_ac_index_flag,
dnxhd_1250_run_codes, dnxhd_1250_run_bits, dnxhd_1250_run,
{ 90, 180, 220 } },
{ 1251, 1280, 720, 0, 458752, 458752, 4, 8,
dnxhd_1251_luma_weight, dnxhd_1251_chroma_weight,
dnxhd_1251_dc_codes, dnxhd_1251_dc_bits,
@ -945,7 +1143,7 @@ int ff_dnxhd_get_cid_table(int cid)
return -1;
}
int ff_dnxhd_find_cid(AVCodecContext *avctx)
int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth)
{
int i, j;
int mbs = avctx->bit_rate/1000000;
@ -955,7 +1153,7 @@ int ff_dnxhd_find_cid(AVCodecContext *avctx)
const CIDEntry *cid = &ff_dnxhd_cid_table[i];
if (cid->width == avctx->width && cid->height == avctx->height &&
cid->interlaced == !!(avctx->flags & CODEC_FLAG_INTERLACED_DCT) &&
cid->bit_depth == 8) { // until 10 bit is supported
cid->bit_depth == bit_depth) {
for (j = 0; j < sizeof(cid->bit_rates); j++) {
if (cid->bit_rates[j] == mbs)
return cid->cid;

@ -46,6 +46,6 @@ typedef struct {
extern const CIDEntry ff_dnxhd_cid_table[];
int ff_dnxhd_get_cid_table(int cid);
int ff_dnxhd_find_cid(AVCodecContext *avctx);
int ff_dnxhd_find_cid(AVCodecContext *avctx, int bit_depth);
#endif /* AVCODEC_DNXHDDATA_H */

@ -1,6 +1,9 @@
/*
* VC3/DNxHD decoder.
* Copyright (c) 2007 SmartJog S.A., Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
* Copyright (c) 2011 MirriAd Ltd
*
* 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
*
* This file is part of FFmpeg.
*
@ -28,7 +31,7 @@
#include "dnxhddata.h"
#include "dsputil.h"
typedef struct {
typedef struct DNXHDContext {
AVCodecContext *avctx;
AVFrame picture;
GetBitContext gb;
@ -43,17 +46,22 @@ typedef struct {
DECLARE_ALIGNED(16, DCTELEM, blocks)[8][64];
ScanTable scantable;
const CIDEntry *cid_table;
int bit_depth; // 8, 10 or 0 if not initialized at all.
void (*decode_dct_block)(struct DNXHDContext *ctx, DCTELEM *block,
int n, int qscale);
} DNXHDContext;
#define DNXHD_VLC_BITS 9
#define DNXHD_DC_VLC_BITS 7
static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, DCTELEM *block, int n, int qscale);
static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, DCTELEM *block, int n, int qscale);
static av_cold int dnxhd_decode_init(AVCodecContext *avctx)
{
DNXHDContext *ctx = avctx->priv_data;
ctx->avctx = avctx;
dsputil_init(&ctx->dsp, avctx);
avctx->coded_frame = &ctx->picture;
avcodec_get_frame_defaults(&ctx->picture);
ctx->picture.type = AV_PICTURE_TYPE_I;
@ -79,7 +87,7 @@ static int dnxhd_init_vlc(DNXHDContext *ctx, int cid)
init_vlc(&ctx->ac_vlc, DNXHD_VLC_BITS, 257,
ctx->cid_table->ac_bits, 1, 1,
ctx->cid_table->ac_codes, 2, 2, 0);
init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, ctx->cid_table->bit_depth+4,
init_vlc(&ctx->dc_vlc, DNXHD_DC_VLC_BITS, ctx->bit_depth + 4,
ctx->cid_table->dc_bits, 1, 1,
ctx->cid_table->dc_codes, 1, 1, 0);
init_vlc(&ctx->run_vlc, DNXHD_VLC_BITS, 62,
@ -117,8 +125,21 @@ static int dnxhd_decode_header(DNXHDContext *ctx, const uint8_t *buf, int buf_si
av_dlog(ctx->avctx, "width %d, heigth %d\n", ctx->width, ctx->height);
if (buf[0x21] & 0x40) {
av_log(ctx->avctx, AV_LOG_ERROR, "10 bit per component\n");
return -1;
ctx->avctx->pix_fmt = PIX_FMT_YUV422P10;
ctx->avctx->bits_per_raw_sample = 10;
if (ctx->bit_depth != 10) {
dsputil_init(&ctx->dsp, ctx->avctx);
ctx->bit_depth = 10;
ctx->decode_dct_block = dnxhd_decode_dct_block_10;
}
} else {
ctx->avctx->pix_fmt = PIX_FMT_YUV422P;
ctx->avctx->bits_per_raw_sample = 8;
if (ctx->bit_depth != 8) {
dsputil_init(&ctx->dsp, ctx->avctx);
ctx->bit_depth = 8;
ctx->decode_dct_block = dnxhd_decode_dct_block_8;
}
}
cid = AV_RB32(buf + 0x28);
@ -158,79 +179,103 @@ static int dnxhd_decode_header(DNXHDContext *ctx, const uint8_t *buf, int buf_si
return 0;
}
static int dnxhd_decode_dc(DNXHDContext *ctx)
static av_always_inline void dnxhd_decode_dct_block(DNXHDContext *ctx,
DCTELEM *block, int n,
int qscale,
int index_bits,
int level_bias,
int level_shift)
{
int len;
len = get_vlc2(&ctx->gb, ctx->dc_vlc.table, DNXHD_DC_VLC_BITS, 1);
return len ? get_xbits(&ctx->gb, len) : 0;
}
static void dnxhd_decode_dct_block(DNXHDContext *ctx, DCTELEM *block, int n, int qscale)
{
int i, j, index, index2;
int i, j, index1, index2, len;
int level, component, sign;
const uint8_t *weigth_matrix;
const uint8_t *weight_matrix;
OPEN_READER(bs, &ctx->gb);
if (n&2) {
component = 1 + (n&1);
weigth_matrix = ctx->cid_table->chroma_weight;
weight_matrix = ctx->cid_table->chroma_weight;
} else {
component = 0;
weigth_matrix = ctx->cid_table->luma_weight;
weight_matrix = ctx->cid_table->luma_weight;
}
ctx->last_dc[component] += dnxhd_decode_dc(ctx);
UPDATE_CACHE(bs, &ctx->gb);
GET_VLC(len, bs, &ctx->gb, ctx->dc_vlc.table, DNXHD_DC_VLC_BITS, 1);
if (len) {
level = GET_CACHE(bs, &ctx->gb);
LAST_SKIP_BITS(bs, &ctx->gb, len);
sign = ~level >> 31;
level = (NEG_USR32(sign ^ level, len) ^ sign) - sign;
ctx->last_dc[component] += level;
}
block[0] = ctx->last_dc[component];
//av_log(ctx->avctx, AV_LOG_DEBUG, "dc %d\n", block[0]);
for (i = 1; ; i++) {
index = get_vlc2(&ctx->gb, ctx->ac_vlc.table, DNXHD_VLC_BITS, 2);
//av_log(ctx->avctx, AV_LOG_DEBUG, "index %d\n", index);
level = ctx->cid_table->ac_level[index];
UPDATE_CACHE(bs, &ctx->gb);
GET_VLC(index1, bs, &ctx->gb, ctx->ac_vlc.table,
DNXHD_VLC_BITS, 2);
//av_log(ctx->avctx, AV_LOG_DEBUG, "index %d\n", index1);
level = ctx->cid_table->ac_level[index1];
if (!level) { /* EOB */
//av_log(ctx->avctx, AV_LOG_DEBUG, "EOB\n");
return;
break;
}
sign = get_sbits(&ctx->gb, 1);
if (ctx->cid_table->ac_index_flag[index]) {
level += get_bits(&ctx->gb, ctx->cid_table->index_bits)<<6;
sign = SHOW_SBITS(bs, &ctx->gb, 1);
SKIP_BITS(bs, &ctx->gb, 1);
if (ctx->cid_table->ac_index_flag[index1]) {
level += SHOW_UBITS(bs, &ctx->gb, index_bits) << 6;
SKIP_BITS(bs, &ctx->gb, index_bits);
}
if (ctx->cid_table->ac_run_flag[index]) {
index2 = get_vlc2(&ctx->gb, ctx->run_vlc.table, DNXHD_VLC_BITS, 2);
if (ctx->cid_table->ac_run_flag[index1]) {
UPDATE_CACHE(bs, &ctx->gb);
GET_VLC(index2, bs, &ctx->gb, ctx->run_vlc.table,
DNXHD_VLC_BITS, 2);
i += ctx->cid_table->run[index2];
}
if (i > 63) {
av_log(ctx->avctx, AV_LOG_ERROR, "ac tex damaged %d, %d\n", n, i);
return;
break;
}
j = ctx->scantable.permutated[i];
//av_log(ctx->avctx, AV_LOG_DEBUG, "j %d\n", j);
//av_log(ctx->avctx, AV_LOG_DEBUG, "level %d, weigth %d\n", level, weigth_matrix[i]);
level = (2*level+1) * qscale * weigth_matrix[i];
if (ctx->cid_table->bit_depth == 10) {
if (weigth_matrix[i] != 8)
level += 8;
level >>= 4;
} else {
if (weigth_matrix[i] != 32)
level += 32;
level >>= 6;
}
//av_log(ctx->avctx, AV_LOG_DEBUG, "level %d, weight %d\n", level, weight_matrix[i]);
level = (2*level+1) * qscale * weight_matrix[i];
if (weight_matrix[i] != level_bias)
level += level_bias;
level >>= level_shift;
//av_log(NULL, AV_LOG_DEBUG, "i %d, j %d, end level %d\n", i, j, level);
block[j] = (level^sign) - sign;
}
CLOSE_READER(bs, &ctx->gb);
}
static void dnxhd_decode_dct_block_8(DNXHDContext *ctx, DCTELEM *block,
int n, int qscale)
{
dnxhd_decode_dct_block(ctx, block, n, qscale, 4, 32, 6);
}
static void dnxhd_decode_dct_block_10(DNXHDContext *ctx, DCTELEM *block,
int n, int qscale)
{
dnxhd_decode_dct_block(ctx, block, n, qscale, 6, 8, 4);
}
static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
{
int shift1 = ctx->bit_depth == 10;
int dct_linesize_luma = ctx->picture.linesize[0];
int dct_linesize_chroma = ctx->picture.linesize[1];
uint8_t *dest_y, *dest_u, *dest_v;
int dct_offset;
int dct_y_offset, dct_x_offset;
int qscale, i;
qscale = get_bits(&ctx->gb, 11);
@ -239,7 +284,7 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
for (i = 0; i < 8; i++) {
ctx->dsp.clear_block(ctx->blocks[i]);
dnxhd_decode_dct_block(ctx, ctx->blocks[i], i, qscale);
ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
}
if (ctx->picture.interlaced_frame) {
@ -247,9 +292,9 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
dct_linesize_chroma <<= 1;
}
dest_y = ctx->picture.data[0] + ((y * dct_linesize_luma) << 4) + (x << 4);
dest_u = ctx->picture.data[1] + ((y * dct_linesize_chroma) << 4) + (x << 3);
dest_v = ctx->picture.data[2] + ((y * dct_linesize_chroma) << 4) + (x << 3);
dest_y = ctx->picture.data[0] + ((y * dct_linesize_luma) << 4) + (x << (4 + shift1));
dest_u = ctx->picture.data[1] + ((y * dct_linesize_chroma) << 4) + (x << (3 + shift1));
dest_v = ctx->picture.data[2] + ((y * dct_linesize_chroma) << 4) + (x << (3 + shift1));
if (ctx->cur_field) {
dest_y += ctx->picture.linesize[0];
@ -257,18 +302,19 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
dest_v += ctx->picture.linesize[2];
}
dct_offset = dct_linesize_luma << 3;
ctx->dsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]);
ctx->dsp.idct_put(dest_y + 8, dct_linesize_luma, ctx->blocks[1]);
ctx->dsp.idct_put(dest_y + dct_offset, dct_linesize_luma, ctx->blocks[4]);
ctx->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize_luma, ctx->blocks[5]);
dct_y_offset = dct_linesize_luma << 3;
dct_x_offset = 8 << shift1;
ctx->dsp.idct_put(dest_y, dct_linesize_luma, ctx->blocks[0]);
ctx->dsp.idct_put(dest_y + dct_x_offset, dct_linesize_luma, ctx->blocks[1]);
ctx->dsp.idct_put(dest_y + dct_y_offset, dct_linesize_luma, ctx->blocks[4]);
ctx->dsp.idct_put(dest_y + dct_y_offset + dct_x_offset, dct_linesize_luma, ctx->blocks[5]);
if (!(ctx->avctx->flags & CODEC_FLAG_GRAY)) {
dct_offset = dct_linesize_chroma << 3;
ctx->dsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]);
ctx->dsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[3]);
ctx->dsp.idct_put(dest_u + dct_offset, dct_linesize_chroma, ctx->blocks[6]);
ctx->dsp.idct_put(dest_v + dct_offset, dct_linesize_chroma, ctx->blocks[7]);
dct_y_offset = dct_linesize_chroma << 3;
ctx->dsp.idct_put(dest_u, dct_linesize_chroma, ctx->blocks[2]);
ctx->dsp.idct_put(dest_v, dct_linesize_chroma, ctx->blocks[3]);
ctx->dsp.idct_put(dest_u + dct_y_offset, dct_linesize_chroma, ctx->blocks[6]);
ctx->dsp.idct_put(dest_v + dct_y_offset, dct_linesize_chroma, ctx->blocks[7]);
}
return 0;
@ -280,7 +326,7 @@ static int dnxhd_decode_macroblocks(DNXHDContext *ctx, const uint8_t *buf, int b
for (y = 0; y < ctx->mb_height; y++) {
ctx->last_dc[0] =
ctx->last_dc[1] =
ctx->last_dc[2] = 1<<(ctx->cid_table->bit_depth+2); // for levels +2^(bitdepth-1)
ctx->last_dc[2] = 1 << (ctx->bit_depth + 2); // for levels +2^(bitdepth-1)
init_get_bits(&ctx->gb, buf + ctx->mb_scan_index[y], (buf_size - ctx->mb_scan_index[y]) << 3);
for (x = 0; x < ctx->mb_width; x++) {
//START_TIMER;
@ -313,7 +359,6 @@ static int dnxhd_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
first_field = 1;
}
avctx->pix_fmt = PIX_FMT_YUV422P;
if (av_image_check_size(ctx->width, ctx->height, 0, avctx))
return -1;
avcodec_set_dimensions(avctx, ctx->width, ctx->height);

@ -1,8 +1,10 @@
/*
* VC3/DNxHD encoder
* Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
* Copyright (c) 2011 MirriAd Ltd
*
* VC-3 encoder funded by the British Broadcasting Corporation
* 10 bit support added by MirriAd Ltd, Joseph Artsimovich <joseph@mirriad.com>
*
* This file is part of FFmpeg.
*
@ -32,6 +34,7 @@
#include "dnxhdenc.h"
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
#define DNX10BIT_QMAT_SHIFT 18 // The largest value that will not lead to overflow for 10bit samples.
static const AVOption options[]={
{"nitris_compat", "encode with Avid Nitris compatibility", offsetof(DNXHDEncContext, nitris_compat), FF_OPT_TYPE_INT, {.dbl = 0}, 0, 1, VE},
@ -41,7 +44,7 @@ static const AVClass class = { "dnxhd", av_default_item_name, options, LIBAVUTIL
#define LAMBDA_FRAC_BITS 10
static void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
static void dnxhd_8bit_get_pixels_8x4_sym(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
{
int i;
for (i = 0; i < 4; i++) {
@ -58,6 +61,43 @@ static void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels,
memcpy(block + 24, block - 32, sizeof(*block) * 8);
}
static av_always_inline void dnxhd_10bit_get_pixels_8x4_sym(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
{
int i;
block += 32;
for (i = 0; i < 4; i++) {
memcpy(block + i * 8, pixels + i * line_size, 8 * sizeof(*block));
memcpy(block - (i+1) * 8, pixels + i * line_size, 8 * sizeof(*block));
}
}
static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, DCTELEM *block,
int n, int qscale, int *overflow)
{
const uint8_t *scantable= ctx->intra_scantable.scantable;
const int *qmat = ctx->q_intra_matrix[qscale];
int last_non_zero = 0;
ctx->dsp.fdct(block);
// Divide by 4 with rounding, to compensate scaling of DCT coefficients
block[0] = (block[0] + 2) >> 2;
for (int i = 1; i < 64; ++i) {
int j = scantable[i];
int sign = block[j] >> 31;
int level = (block[j] ^ sign) - sign;
level = level * qmat[j] >> DNX10BIT_QMAT_SHIFT;
block[j] = (level ^ sign) - sign;
if (level)
last_non_zero = i;
}
return last_non_zero;
}
static int dnxhd_init_vlc(DNXHDEncContext *ctx)
{
int i, j, level, run;
@ -118,31 +158,55 @@ static int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias)
// init first elem to 1 to avoid div by 0 in convert_matrix
uint16_t weight_matrix[64] = {1,}; // convert_matrix needs uint16_t*
int qscale, i;
const uint8_t *luma_weight_table = ctx->cid_table->luma_weight;
const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight;
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail);
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail);
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail);
for (i = 1; i < 64; i++) {
int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
weight_matrix[j] = ctx->cid_table->luma_weight[i];
}
ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
for (i = 1; i < 64; i++) {
int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
weight_matrix[j] = ctx->cid_table->chroma_weight[i];
}
ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
for (i = 0; i < 64; i++) {
ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2;
ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
if (ctx->cid_table->bit_depth == 8) {
for (i = 1; i < 64; i++) {
int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
weight_matrix[j] = ctx->cid_table->luma_weight[i];
}
ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix,
ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
for (i = 1; i < 64; i++) {
int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
weight_matrix[j] = ctx->cid_table->chroma_weight[i];
}
ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix,
ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1);
for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
for (i = 0; i < 64; i++) {
ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2;
ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2;
ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2;
}
}
} else {
// 10-bit
for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) {
for (i = 1; i < 64; i++) {
int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]];
// The quantization formula from the VC-3 standard is:
// quantized = sign(block[i]) * floor(abs(block[i]/s) * p / (qscale * weight_table[i]))
// Where p is 32 for 8-bit samples and 8 for 10-bit ones.
// The s factor compensates scaling of DCT coefficients done by the DCT routines,
// and therefore is not present in standard. It's 8 for 8-bit samples and 4 for 10-bit ones.
// We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be:
// ((1 << DNX10BIT_QMAT_SHIFT) * (p / s)) / (qscale * weight_table[i])
// For 10-bit samples, p / s == 2
ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * luma_weight_table[i]);
ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * chroma_weight_table[i]);
}
}
}
return 0;
fail:
return -1;
@ -165,10 +229,22 @@ static int dnxhd_init_rc(DNXHDEncContext *ctx)
static int dnxhd_encode_init(AVCodecContext *avctx)
{
DNXHDEncContext *ctx = avctx->priv_data;
int i, index;
int i, index, bit_depth;
switch (avctx->pix_fmt) {
case PIX_FMT_YUV422P:
bit_depth = 8;
break;
case PIX_FMT_YUV422P10:
bit_depth = 10;
break;
default:
av_log(avctx, AV_LOG_ERROR, "pixel format is incompatible with DNxHD\n");
return -1;
}
ctx->cid = ff_dnxhd_find_cid(avctx);
if (!ctx->cid || avctx->pix_fmt != PIX_FMT_YUV422P) {
ctx->cid = ff_dnxhd_find_cid(avctx, bit_depth);
if (!ctx->cid) {
av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD\n");
return -1;
}
@ -181,15 +257,25 @@ static int dnxhd_encode_init(AVCodecContext *avctx)
ctx->m.mb_intra = 1;
ctx->m.h263_aic = 1;
ctx->get_pixels_8x4_sym = dnxhd_get_pixels_8x4;
avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
dsputil_init(&ctx->m.dsp, avctx);
ff_dct_common_init(&ctx->m);
if (!ctx->m.dct_quantize)
ctx->m.dct_quantize = dct_quantize_c;
if (ctx->cid_table->bit_depth == 10) {
ctx->m.dct_quantize = dnxhd_10bit_dct_quantize;
ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym;
ctx->block_width_l2 = 4;
} else {
ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym;
ctx->block_width_l2 = 3;
}
#if HAVE_MMX
ff_dnxhd_init_mmx(ctx);
#endif
if (!ctx->m.dct_quantize)
ctx->m.dct_quantize = dct_quantize_c;
ctx->m.mb_height = (avctx->height + 15) / 16;
ctx->m.mb_width = (avctx->width + 15) / 16;
@ -255,7 +341,7 @@ static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf)
AV_WB16(buf + 0x1a, avctx->width); // SPL
AV_WB16(buf + 0x1d, avctx->height>>ctx->interlaced); // NAL
buf[0x21] = 0x38; // FIXME 8 bit per comp
buf[0x21] = ctx->cid_table->bit_depth == 10 ? 0x58 : 0x38;
buf[0x22] = 0x88 + (ctx->interlaced<<2);
AV_WB32(buf + 0x28, ctx->cid); // CID
buf[0x2c] = ctx->interlaced ? 0 : 0x80;
@ -321,15 +407,27 @@ static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, DCTELEM *b
if (level) {
if (level < 0) {
level = (1-2*level) * qscale * weight_matrix[i];
if (weight_matrix[i] != 32)
level += 32;
level >>= 6;
if (ctx->cid_table->bit_depth == 10) {
if (weight_matrix[i] != 8)
level += 8;
level >>= 4;
} else {
if (weight_matrix[i] != 32)
level += 32;
level >>= 6;
}
level = -level;
} else {
level = (2*level+1) * qscale * weight_matrix[i];
if (weight_matrix[i] != 32)
level += 32;
level >>= 6;
if (ctx->cid_table->bit_depth == 10) {
if (weight_matrix[i] != 8)
level += 8;
level >>= 4;
} else {
if (weight_matrix[i] != 32)
level += 32;
level >>= 6;
}
}
block[j] = level;
}
@ -364,22 +462,24 @@ static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *bl
static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
{
const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << 4);
const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << 3);
const int bs = ctx->block_width_l2;
const int bw = 1 << bs;
const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << bs+1);
const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs);
DSPContext *dsp = &ctx->m.dsp;
dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize);
dsp->get_pixels(ctx->blocks[1], ptr_y + 8, ctx->m.linesize);
dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize);
dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize);
dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize);
dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize);
dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize);
dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize);
if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
if (ctx->interlaced) {
ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize);
ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize);
ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
} else {
dsp->clear_block(ctx->blocks[4]);
dsp->clear_block(ctx->blocks[5]);
@ -387,10 +487,10 @@ static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, in
dsp->clear_block(ctx->blocks[7]);
}
} else {
dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize);
dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize);
dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize);
dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize);
dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize);
}
}
@ -417,7 +517,7 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i
ctx->m.last_dc[0] =
ctx->m.last_dc[1] =
ctx->m.last_dc[2] = 1024;
ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2);
for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
unsigned mb = mb_y * ctx->m.mb_width + mb_x;
@ -440,6 +540,8 @@ static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, i
diff = block[0] - ctx->m.last_dc[n];
if (diff < 0) nbits = av_log2_16bit(-2*diff);
else nbits = av_log2_16bit( 2*diff);
assert(nbits < ctx->cid_table->bit_depth + 4);
dc_bits += ctx->cid_table->dc_bits[nbits] + nbits;
ctx->m.last_dc[n] = block[0];
@ -465,7 +567,7 @@ static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, int jobnr, int
ctx->m.last_dc[0] =
ctx->m.last_dc[1] =
ctx->m.last_dc[2] = 1024;
ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2);
for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
unsigned mb = mb_y * ctx->m.mb_width + mb_x;
int qscale = ctx->mb_qscale[mb];
@ -515,13 +617,39 @@ static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg, int jobnr, int
DNXHDEncContext *ctx = avctx->priv_data;
int mb_y = jobnr, mb_x;
ctx = ctx->thread[threadnr];
for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) {
unsigned mb = mb_y * ctx->m.mb_width + mb_x;
uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize) + (mb_x<<4);
int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
ctx->mb_cmp[mb].value = varc;
ctx->mb_cmp[mb].mb = mb;
if (ctx->cid_table->bit_depth == 8) {
uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize);
for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) {
unsigned mb = mb_y * ctx->m.mb_width + mb_x;
int sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize);
int varc = (ctx->m.dsp.pix_norm1(pix, ctx->m.linesize) - (((unsigned)(sum*sum))>>8)+128)>>8;
ctx->mb_cmp[mb].value = varc;
ctx->mb_cmp[mb].mb = mb;
}
} else { // 10-bit
int const linesize = ctx->m.linesize >> 1;
for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) {
uint16_t *pix = (uint16_t*)ctx->thread[0]->src[0] + ((mb_y << 4) * linesize) + (mb_x << 4);
unsigned mb = mb_y * ctx->m.mb_width + mb_x;
int sum = 0;
int sqsum = 0;
int mean, sqmean;
// Macroblocks are 16x16 pixels, unlike DCT blocks which are 8x8.
for (int i = 0; i < 16; ++i) {
for (int j = 0; j < 16; ++j) {
// Turn 16-bit pixels into 10-bit ones.
int const sample = (unsigned)pix[j] >> 6;
sum += sample;
sqsum += sample * sample;
// 2^10 * 2^10 * 16 * 16 = 2^28, which is less than INT_MAX
}
pix += linesize;
}
mean = sum >> 8; // 16*16 == 2^8
sqmean = sqsum >> 8;
ctx->mb_cmp[mb].value = sqmean - mean * mean;
ctx->mb_cmp[mb].mb = mb;
}
}
return 0;
}
@ -871,7 +999,7 @@ AVCodec ff_dnxhd_encoder = {
dnxhd_encode_picture,
dnxhd_encode_end,
.capabilities = CODEC_CAP_SLICE_THREADS,
.pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_NONE},
.pix_fmts = (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_YUV422P10, PIX_FMT_NONE},
.long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"),
.priv_class = &class,
};

@ -52,8 +52,12 @@ typedef struct DNXHDEncContext {
struct DNXHDEncContext *thread[MAX_THREADS];
// Because our samples are either 8 or 16 bits for 8-bit and 10-bit
// encoding respectively, these refer either to bytes or to two-byte words.
unsigned dct_y_offset;
unsigned dct_uv_offset;
unsigned block_width_l2;
int interlaced;
int cur_field;

@ -306,25 +306,6 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
return s;
}
static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
{
int i;
/* read the pixels */
for(i=0;i<8;i++) {
block[0] = pixels[0];
block[1] = pixels[1];
block[2] = pixels[2];
block[3] = pixels[3];
block[4] = pixels[4];
block[5] = pixels[5];
block[6] = pixels[6];
block[7] = pixels[7];
pixels += line_size;
block += 8;
}
}
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
const uint8_t *s2, int stride){
int i;
@ -2836,17 +2817,22 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
ff_check_alignment();
#if CONFIG_ENCODERS
if(avctx->dct_algo==FF_DCT_FASTINT) {
c->fdct = fdct_ifast;
c->fdct248 = fdct_ifast248;
}
else if(avctx->dct_algo==FF_DCT_FAAN) {
c->fdct = ff_faandct;
c->fdct248 = ff_faandct248;
}
else {
c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
c->fdct248 = ff_fdct248_islow;
if (avctx->bits_per_raw_sample == 10) {
c->fdct = ff_jpeg_fdct_islow_10;
c->fdct248 = ff_fdct248_islow_10;
} else {
if(avctx->dct_algo==FF_DCT_FASTINT) {
c->fdct = fdct_ifast;
c->fdct248 = fdct_ifast248;
}
else if(avctx->dct_algo==FF_DCT_FAAN) {
c->fdct = ff_faandct;
c->fdct248 = ff_faandct248;
}
else {
c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
c->fdct248 = ff_fdct248_islow_8;
}
}
#endif //CONFIG_ENCODERS
@ -2910,7 +2896,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
}
}
c->get_pixels = get_pixels_c;
c->diff_pixels = diff_pixels_c;
c->put_pixels_clamped = ff_put_pixels_clamped_c;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
@ -3138,13 +3123,14 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
#define BIT_DEPTH_FUNCS(depth)\
#define BIT_DEPTH_FUNCS(depth, dct)\
c->get_pixels = FUNCC(get_pixels ## dct , depth);\
c->draw_edges = FUNCC(draw_edges , depth);\
c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
c->clear_block = FUNCC(clear_block , depth);\
c->clear_blocks = FUNCC(clear_blocks , depth);\
c->add_pixels8 = FUNCC(add_pixels8 , depth);\
c->add_pixels4 = FUNCC(add_pixels4 , depth);\
c->clear_block = FUNCC(clear_block ## dct , depth);\
c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
\
@ -3178,15 +3164,23 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
switch (avctx->bits_per_raw_sample) {
case 9:
BIT_DEPTH_FUNCS(9);
if (c->dct_bits == 32) {
BIT_DEPTH_FUNCS(9, _32);
} else {
BIT_DEPTH_FUNCS(9, _16);
}
break;
case 10:
BIT_DEPTH_FUNCS(10);
if (c->dct_bits == 32) {
BIT_DEPTH_FUNCS(10, _32);
} else {
BIT_DEPTH_FUNCS(10, _16);
}
break;
default:
av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
case 8:
BIT_DEPTH_FUNCS(8);
BIT_DEPTH_FUNCS(8, _16);
break;
}

@ -40,8 +40,10 @@ typedef short DCTELEM;
void fdct_ifast (DCTELEM *data);
void fdct_ifast248 (DCTELEM *data);
void ff_jpeg_fdct_islow (DCTELEM *data);
void ff_fdct248_islow (DCTELEM *data);
void ff_jpeg_fdct_islow_8(DCTELEM *data);
void ff_jpeg_fdct_islow_10(DCTELEM *data);
void ff_fdct248_islow_8(DCTELEM *data);
void ff_fdct248_islow_10(DCTELEM *data);
void j_rev_dct (DCTELEM *data);
void j_rev_dct4 (DCTELEM *data);
@ -217,6 +219,11 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int lin
* DSPContext.
*/
typedef struct DSPContext {
/**
* Size of DCT coefficients.
*/
int dct_bits;
/* pixel ops : interface with DCT */
void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);

@ -192,43 +192,89 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, i
}
}
static void FUNCC(add_pixels8)(uint8_t *restrict p_pixels, DCTELEM *p_block, int line_size)
{
int i;
pixel *restrict pixels = (pixel *restrict)p_pixels;
dctcoef *block = (dctcoef*)p_block;
line_size >>= sizeof(pixel)-1;
for(i=0;i<8;i++) {
pixels[0] += block[0];
pixels[1] += block[1];
pixels[2] += block[2];
pixels[3] += block[3];
pixels[4] += block[4];
pixels[5] += block[5];
pixels[6] += block[6];
pixels[7] += block[7];
pixels += line_size;
block += 8;
}
#define DCTELEM_FUNCS(dctcoef, suffix) \
static void FUNCC(get_pixels ## suffix)(DCTELEM *restrict _block, \
const uint8_t *_pixels, \
int line_size) \
{ \
const pixel *pixels = (const pixel *) _pixels; \
dctcoef *restrict block = (dctcoef *) _block; \
int i; \
\
/* read the pixels */ \
for(i=0;i<8;i++) { \
block[0] = pixels[0]; \
block[1] = pixels[1]; \
block[2] = pixels[2]; \
block[3] = pixels[3]; \
block[4] = pixels[4]; \
block[5] = pixels[5]; \
block[6] = pixels[6]; \
block[7] = pixels[7]; \
pixels += line_size / sizeof(pixel); \
block += 8; \
} \
} \
\
static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels, \
DCTELEM *_block, \
int line_size) \
{ \
int i; \
pixel *restrict pixels = (pixel *restrict)_pixels; \
dctcoef *block = (dctcoef*)_block; \
line_size /= sizeof(pixel); \
\
for(i=0;i<8;i++) { \
pixels[0] += block[0]; \
pixels[1] += block[1]; \
pixels[2] += block[2]; \
pixels[3] += block[3]; \
pixels[4] += block[4]; \
pixels[5] += block[5]; \
pixels[6] += block[6]; \
pixels[7] += block[7]; \
pixels += line_size; \
block += 8; \
} \
} \
\
static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels, \
DCTELEM *_block, \
int line_size) \
{ \
int i; \
pixel *restrict pixels = (pixel *restrict)_pixels; \
dctcoef *block = (dctcoef*)_block; \
line_size /= sizeof(pixel); \
\
for(i=0;i<4;i++) { \
pixels[0] += block[0]; \
pixels[1] += block[1]; \
pixels[2] += block[2]; \
pixels[3] += block[3]; \
pixels += line_size; \
block += 4; \
} \
} \
\
static void FUNCC(clear_block ## suffix)(DCTELEM *block) \
{ \
memset(block, 0, sizeof(dctcoef)*64); \
} \
\
/** \
* memset(blocks, 0, sizeof(DCTELEM)*6*64) \
*/ \
static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks) \
{ \
memset(blocks, 0, sizeof(dctcoef)*6*64); \
}
static void FUNCC(add_pixels4)(uint8_t *restrict p_pixels, DCTELEM *p_block, int line_size)
{
int i;
pixel *restrict pixels = (pixel *restrict)p_pixels;
dctcoef *block = (dctcoef*)p_block;
line_size >>= sizeof(pixel)-1;
for(i=0;i<4;i++) {
pixels[0] += block[0];
pixels[1] += block[1];
pixels[2] += block[2];
pixels[3] += block[3];
pixels += line_size;
block += 4;
}
}
DCTELEM_FUNCS(DCTELEM, _16)
#if BIT_DEPTH > 8
DCTELEM_FUNCS(dctcoef, _32)
#endif
#define PIXOP2(OPNAME, OP) \
static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
@ -1232,16 +1278,3 @@ void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
FUNCC(avg_pixels16)(dst, src, stride, 16);
}
static void FUNCC(clear_block)(DCTELEM *block)
{
memset(block, 0, sizeof(dctcoef)*64);
}
/**
* memset(blocks, 0, sizeof(DCTELEM)*6*64)
*/
static void FUNCC(clear_blocks)(DCTELEM *blocks)
{
memset(blocks, 0, sizeof(dctcoef)*6*64);
}

@ -63,6 +63,11 @@ void ff_eac3_get_frame_exp_strategy(AC3EncodeContext *s)
{
int ch;
if (s->num_blocks < 6) {
s->use_frame_exp_strategy = 0;
return;
}
s->use_frame_exp_strategy = 1;
for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++) {
int expstr = eac3_frame_expstr_index_tab[s->exp_strategy[ch][0]-1]
@ -89,7 +94,7 @@ void ff_eac3_set_cpl_states(AC3EncodeContext *s)
/* set first cpl coords */
for (ch = 1; ch <= s->fbw_channels; ch++)
first_cpl_coords[ch] = 1;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (block->channel_in_cpl[ch]) {
@ -104,7 +109,7 @@ void ff_eac3_set_cpl_states(AC3EncodeContext *s)
}
/* set first cpl leak */
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 0; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
if (block->cpl_in_use) {
block->new_cpl_leak = 2;
@ -130,7 +135,7 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
put_bits(&s->pb, 2, s->bit_alloc.sr_code); /* sample rate code */
} else {
put_bits(&s->pb, 2, s->bit_alloc.sr_code); /* sample rate code */
put_bits(&s->pb, 2, 0x3); /* number of blocks = 6 */
put_bits(&s->pb, 2, s->num_blks_code); /* number of blocks */
}
put_bits(&s->pb, 3, s->channel_mode); /* audio coding mode */
put_bits(&s->pb, 1, s->lfe_on); /* LFE channel indicator */
@ -141,11 +146,15 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
/* TODO: mixing metadata */
put_bits(&s->pb, 1, 0); /* no info metadata */
/* TODO: info metadata */
if (s->num_blocks != 6)
put_bits(&s->pb, 1, !(s->avctx->frame_number % 6)); /* converter sync flag */
put_bits(&s->pb, 1, 0); /* no additional bit stream info */
/* frame header */
if (s->num_blocks == 6) {
put_bits(&s->pb, 1, !s->use_frame_exp_strategy);/* exponent strategy syntax */
put_bits(&s->pb, 1, 0); /* aht enabled = no */
}
put_bits(&s->pb, 2, 0); /* snr offset strategy = 1 */
put_bits(&s->pb, 1, 0); /* transient pre-noise processing enabled = no */
put_bits(&s->pb, 1, 0); /* block switch syntax enabled = no */
@ -158,7 +167,7 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
/* coupling strategy use flags */
if (s->channel_mode > AC3_CHMODE_MONO) {
put_bits(&s->pb, 1, s->blocks[0].cpl_in_use);
for (blk = 1; blk < AC3_MAX_BLOCKS; blk++) {
for (blk = 1; blk < s->num_blocks; blk++) {
AC3Block *block = &s->blocks[blk];
put_bits(&s->pb, 1, block->new_cpl_strategy);
if (block->new_cpl_strategy)
@ -170,26 +179,31 @@ void ff_eac3_output_frame_header(AC3EncodeContext *s)
for (ch = !s->cpl_on; ch <= s->fbw_channels; ch++)
put_bits(&s->pb, 5, s->frame_exp_strategy[ch]);
} else {
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
for (blk = 0; blk < s->num_blocks; blk++)
for (ch = !s->blocks[blk].cpl_in_use; ch <= s->fbw_channels; ch++)
put_bits(&s->pb, 2, s->exp_strategy[ch][blk]);
}
if (s->lfe_on) {
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++)
for (blk = 0; blk < s->num_blocks; blk++)
put_bits(&s->pb, 1, s->exp_strategy[s->lfe_channel][blk]);
}
/* E-AC-3 to AC-3 converter exponent strategy (unfortunately not optional...) */
/* E-AC-3 to AC-3 converter exponent strategy (not optional when num blocks == 6) */
if (s->num_blocks != 6) {
put_bits(&s->pb, 1, 0);
} else {
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (s->use_frame_exp_strategy)
put_bits(&s->pb, 5, s->frame_exp_strategy[ch]);
else
put_bits(&s->pb, 5, 0);
}
}
/* snr offsets */
put_bits(&s->pb, 6, s->coarse_snr_offset);
put_bits(&s->pb, 4, s->fine_snr_offset[1]);
/* block start info */
put_bits(&s->pb, 1, 0);
if (s->num_blocks > 1)
put_bits(&s->pb, 1, 0);
}

@ -3707,6 +3707,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
dsputil_init(&s->dsp, s->avctx);
} else {
av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);

@ -42,9 +42,6 @@
#include "x86/dsputil_mmx.h"
#endif
#define xglue(x, y) x ## y
#define glue(x, y) xglue(x, y)
#define FF_COLOR_RGB 0 /**< RGB color space */
#define FF_COLOR_GRAY 1 /**< gray color space */
#define FF_COLOR_YUV 2 /**< YUV color space. 16 <= Y <= 235, 16 <= U, V <= 240 */

@ -1,402 +1,25 @@
/*
* jfdctint.c
*
* This file is part of the Independent JPEG Group's software.
*
* The authors make NO WARRANTY or representation, either express or implied,
* with respect to this software, its quality, accuracy, merchantability, or
* fitness for a particular purpose. This software is provided "AS IS", and
* you, its user, assume the entire risk as to its quality and accuracy.
*
* This software is copyright (C) 1991-1996, Thomas G. Lane.
* All Rights Reserved except as specified below.
*
* Permission is hereby granted to use, copy, modify, and distribute this
* software (or portions thereof) for any purpose, without fee, subject to
* these conditions:
* (1) If any part of the source code for this software is distributed, then
* this README file must be included, with this copyright and no-warranty
* notice unaltered; and any additions, deletions, or changes to the original
* files must be clearly indicated in accompanying documentation.
* (2) If only executable code is distributed, then the accompanying
* documentation must state that "this software is based in part on the work
* of the Independent JPEG Group".
* (3) Permission for use of this software is granted only if the user accepts
* full responsibility for any undesirable consequences; the authors accept
* NO LIABILITY for damages of any kind.
*
* These conditions apply to any software derived from or based on the IJG
* code, not just to the unmodified library. If you use our work, you ought
* to acknowledge us.
*
* Permission is NOT granted for the use of any IJG author's name or company
* name in advertising or publicity relating to this software or products
* derived from it. This software may be referred to only as "the Independent
* JPEG Group's software".
*
* We specifically permit and encourage the use of this software as the basis
* of commercial products, provided that all warranty or liability claims are
* assumed by the product vendor.
*
* This file contains a slow-but-accurate integer implementation of the
* forward DCT (Discrete Cosine Transform).
*
* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
* on each column. Direct algorithms are also available, but they are
* much more complex and seem not to be any faster when reduced to code.
*
* This implementation is based on an algorithm described in
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
* The primary algorithm described there uses 11 multiplies and 29 adds.
* We use their alternate method with 12 multiplies and 32 adds.
* The advantage of this method is that no data path contains more than one
* multiplication; this allows a very simple and accurate implementation in
* scaled fixed-point arithmetic, with a minimal number of shifts.
*/
/**
* @file
* Independent JPEG Group's slow & accurate dct.
*/
#include <stdlib.h>
#include <stdio.h>
#include "libavutil/common.h"
#include "dsputil.h"
#define DCTSIZE 8
#define BITS_IN_JSAMPLE 8
#define GLOBAL(x) x
#define RIGHT_SHIFT(x, n) ((x) >> (n))
#define MULTIPLY16C16(var,const) ((var)*(const))
#if 1 //def USE_ACCURATE_ROUNDING
#define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
#else
#define DESCALE(x,n) RIGHT_SHIFT(x, n)
#endif
/*
* This module is specialized to the case DCTSIZE = 8.
*/
#if DCTSIZE != 8
Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
#endif
/*
* The poop on this scaling stuff is as follows:
*
* Each 1-D DCT step produces outputs which are a factor of sqrt(N)
* larger than the true DCT outputs. The final outputs are therefore
* a factor of N larger than desired; since N=8 this can be cured by
* a simple right shift at the end of the algorithm. The advantage of
* this arrangement is that we save two multiplications per 1-D DCT,
* because the y0 and y4 outputs need not be divided by sqrt(N).
* In the IJG code, this factor of 8 is removed by the quantization step
* (in jcdctmgr.c), NOT in this module.
* This file is part of Libav.
*
* We have to do addition and subtraction of the integer inputs, which
* is no problem, and multiplication by fractional constants, which is
* a problem to do in integer arithmetic. We multiply all the constants
* by CONST_SCALE and convert them to integer constants (thus retaining
* CONST_BITS bits of precision in the constants). After doing a
* multiplication we have to divide the product by CONST_SCALE, with proper
* rounding, to produce the correct output. This division can be done
* cheaply as a right shift of CONST_BITS bits. We postpone shifting
* as long as possible so that partial sums can be added together with
* full fractional precision.
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* The outputs of the first pass are scaled up by PASS1_BITS bits so that
* they are represented to better-than-integral precision. These outputs
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
* with the recommended scaling. (For 12-bit sample data, the intermediate
* array is int32_t anyway.)
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* To avoid overflow of the 32-bit intermediate results in pass 2, we must
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
* shows that the values given below are the most effective.
*/
#if BITS_IN_JSAMPLE == 8
#define CONST_BITS 13
#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
#else
#define CONST_BITS 13
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
#endif
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
* causing a lot of useless floating-point operations at run time.
* To get around this we use the following pre-calculated constants.
* If you change CONST_BITS you may want to add appropriate values.
* (With a reasonable C compiler, you can just rely on the FIX() macro...)
*/
#if CONST_BITS == 13
#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
#else
#define FIX_0_298631336 FIX(0.298631336)
#define FIX_0_390180644 FIX(0.390180644)
#define FIX_0_541196100 FIX(0.541196100)
#define FIX_0_765366865 FIX(0.765366865)
#define FIX_0_899976223 FIX(0.899976223)
#define FIX_1_175875602 FIX(1.175875602)
#define FIX_1_501321110 FIX(1.501321110)
#define FIX_1_847759065 FIX(1.847759065)
#define FIX_1_961570560 FIX(1.961570560)
#define FIX_2_053119869 FIX(2.053119869)
#define FIX_2_562915447 FIX(2.562915447)
#define FIX_3_072711026 FIX(3.072711026)
#endif
/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
* For 8-bit samples with the recommended scaling, all the variable
* and constant values involved are no more than 16 bits wide, so a
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
* For 12-bit samples, a full 32-bit multiplication will be needed.
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
#else
#define MULTIPLY(var,const) ((var) * (const))
#endif
static av_always_inline void row_fdct(DCTELEM * data){
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5;
DCTELEM *dataptr;
int ctr;
/* Pass 1: process rows. */
/* Note results are scaled up by sqrt(8) compared to a true DCT; */
/* furthermore, we scale the results by 2**PASS1_BITS. */
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[0] + dataptr[7];
tmp7 = dataptr[0] - dataptr[7];
tmp1 = dataptr[1] + dataptr[6];
tmp6 = dataptr[1] - dataptr[6];
tmp2 = dataptr[2] + dataptr[5];
tmp5 = dataptr[2] - dataptr[5];
tmp3 = dataptr[3] + dataptr[4];
tmp4 = dataptr[3] - dataptr[4];
/* Even part per LL&M figure 1 --- note that published figure is faulty;
* rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
*/
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS-PASS1_BITS);
dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS-PASS1_BITS);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
* cK represents cos(K*pi/16).
* i0..i3 in the paper are tmp4..tmp7 here.
*/
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
dataptr += DCTSIZE; /* advance pointer to next row */
}
}
/*
* Perform the forward DCT on one block of samples.
*/
GLOBAL(void)
ff_jpeg_fdct_islow (DCTELEM * data)
{
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5;
DCTELEM *dataptr;
int ctr;
row_fdct(data);
/* Pass 2: process columns.
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
*/
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
/* Even part per LL&M figure 1 --- note that published figure is faulty;
* rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
*/
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS+PASS1_BITS);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
* cK represents cos(K*pi/16).
* i0..i3 in the paper are tmp4..tmp7 here.
*/
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
CONST_BITS+PASS1_BITS);
dataptr++; /* advance pointer to next column */
}
}
/*
* The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT
* on the rows and then, instead of doing even and odd, part on the colums
* you do even part two times.
*/
GLOBAL(void)
ff_fdct248_islow (DCTELEM * data)
{
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1;
DCTELEM *dataptr;
int ctr;
row_fdct(data);
/* Pass 2: process columns.
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
*/
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
tmp10 = tmp0 + tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
tmp13 = tmp0 - tmp3;
dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS+PASS1_BITS);
tmp10 = tmp4 + tmp7;
tmp11 = tmp5 + tmp6;
tmp12 = tmp5 - tmp6;
tmp13 = tmp4 - tmp7;
dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS+PASS1_BITS);
dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS+PASS1_BITS);
#define BIT_DEPTH 8
#include "jfdctint_template.c"
#undef BIT_DEPTH
dataptr++; /* advance pointer to next column */
}
}
#define BIT_DEPTH 10
#include "jfdctint_template.c"
#undef BIT_DEPTH

@ -0,0 +1,405 @@
/*
* jfdctint.c
*
* This file is part of the Independent JPEG Group's software.
*
* The authors make NO WARRANTY or representation, either express or implied,
* with respect to this software, its quality, accuracy, merchantability, or
* fitness for a particular purpose. This software is provided "AS IS", and
* you, its user, assume the entire risk as to its quality and accuracy.
*
* This software is copyright (C) 1991-1996, Thomas G. Lane.
* All Rights Reserved except as specified below.
*
* Permission is hereby granted to use, copy, modify, and distribute this
* software (or portions thereof) for any purpose, without fee, subject to
* these conditions:
* (1) If any part of the source code for this software is distributed, then
* this README file must be included, with this copyright and no-warranty
* notice unaltered; and any additions, deletions, or changes to the original
* files must be clearly indicated in accompanying documentation.
* (2) If only executable code is distributed, then the accompanying
* documentation must state that "this software is based in part on the work
* of the Independent JPEG Group".
* (3) Permission for use of this software is granted only if the user accepts
* full responsibility for any undesirable consequences; the authors accept
* NO LIABILITY for damages of any kind.
*
* These conditions apply to any software derived from or based on the IJG
* code, not just to the unmodified library. If you use our work, you ought
* to acknowledge us.
*
* Permission is NOT granted for the use of any IJG author's name or company
* name in advertising or publicity relating to this software or products
* derived from it. This software may be referred to only as "the Independent
* JPEG Group's software".
*
* We specifically permit and encourage the use of this software as the basis
* of commercial products, provided that all warranty or liability claims are
* assumed by the product vendor.
*
* This file contains a slow-but-accurate integer implementation of the
* forward DCT (Discrete Cosine Transform).
*
* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
* on each column. Direct algorithms are also available, but they are
* much more complex and seem not to be any faster when reduced to code.
*
* This implementation is based on an algorithm described in
* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
* The primary algorithm described there uses 11 multiplies and 29 adds.
* We use their alternate method with 12 multiplies and 32 adds.
* The advantage of this method is that no data path contains more than one
* multiplication; this allows a very simple and accurate implementation in
* scaled fixed-point arithmetic, with a minimal number of shifts.
*/
/**
* @file
* Independent JPEG Group's slow & accurate dct.
*/
#include "libavutil/common.h"
#include "dsputil.h"
#include "bit_depth_template.c"
#define DCTSIZE 8
#define BITS_IN_JSAMPLE BIT_DEPTH
#define GLOBAL(x) x
#define RIGHT_SHIFT(x, n) ((x) >> (n))
#define MULTIPLY16C16(var,const) ((var)*(const))
#if 1 //def USE_ACCURATE_ROUNDING
#define DESCALE(x,n) RIGHT_SHIFT((x) + (1 << ((n) - 1)), n)
#else
#define DESCALE(x,n) RIGHT_SHIFT(x, n)
#endif
/*
* This module is specialized to the case DCTSIZE = 8.
*/
#if DCTSIZE != 8
#error "Sorry, this code only copes with 8x8 DCTs."
#endif
/*
* The poop on this scaling stuff is as follows:
*
* Each 1-D DCT step produces outputs which are a factor of sqrt(N)
* larger than the true DCT outputs. The final outputs are therefore
* a factor of N larger than desired; since N=8 this can be cured by
* a simple right shift at the end of the algorithm. The advantage of
* this arrangement is that we save two multiplications per 1-D DCT,
* because the y0 and y4 outputs need not be divided by sqrt(N).
* In the IJG code, this factor of 8 is removed by the quantization step
* (in jcdctmgr.c), NOT in this module.
*
* We have to do addition and subtraction of the integer inputs, which
* is no problem, and multiplication by fractional constants, which is
* a problem to do in integer arithmetic. We multiply all the constants
* by CONST_SCALE and convert them to integer constants (thus retaining
* CONST_BITS bits of precision in the constants). After doing a
* multiplication we have to divide the product by CONST_SCALE, with proper
* rounding, to produce the correct output. This division can be done
* cheaply as a right shift of CONST_BITS bits. We postpone shifting
* as long as possible so that partial sums can be added together with
* full fractional precision.
*
* The outputs of the first pass are scaled up by PASS1_BITS bits so that
* they are represented to better-than-integral precision. These outputs
* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
* with the recommended scaling. (For 12-bit sample data, the intermediate
* array is int32_t anyway.)
*
* To avoid overflow of the 32-bit intermediate results in pass 2, we must
* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
* shows that the values given below are the most effective.
*/
#undef CONST_BITS
#undef PASS1_BITS
#undef OUT_SHIFT
#if BITS_IN_JSAMPLE == 8
#define CONST_BITS 13
#define PASS1_BITS 4 /* set this to 2 if 16x16 multiplies are faster */
#define OUT_SHIFT PASS1_BITS
#else
#define CONST_BITS 13
#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
#define OUT_SHIFT (PASS1_BITS + 1)
#endif
/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
* causing a lot of useless floating-point operations at run time.
* To get around this we use the following pre-calculated constants.
* If you change CONST_BITS you may want to add appropriate values.
* (With a reasonable C compiler, you can just rely on the FIX() macro...)
*/
#if CONST_BITS == 13
#define FIX_0_298631336 ((int32_t) 2446) /* FIX(0.298631336) */
#define FIX_0_390180644 ((int32_t) 3196) /* FIX(0.390180644) */
#define FIX_0_541196100 ((int32_t) 4433) /* FIX(0.541196100) */
#define FIX_0_765366865 ((int32_t) 6270) /* FIX(0.765366865) */
#define FIX_0_899976223 ((int32_t) 7373) /* FIX(0.899976223) */
#define FIX_1_175875602 ((int32_t) 9633) /* FIX(1.175875602) */
#define FIX_1_501321110 ((int32_t) 12299) /* FIX(1.501321110) */
#define FIX_1_847759065 ((int32_t) 15137) /* FIX(1.847759065) */
#define FIX_1_961570560 ((int32_t) 16069) /* FIX(1.961570560) */
#define FIX_2_053119869 ((int32_t) 16819) /* FIX(2.053119869) */
#define FIX_2_562915447 ((int32_t) 20995) /* FIX(2.562915447) */
#define FIX_3_072711026 ((int32_t) 25172) /* FIX(3.072711026) */
#else
#define FIX_0_298631336 FIX(0.298631336)
#define FIX_0_390180644 FIX(0.390180644)
#define FIX_0_541196100 FIX(0.541196100)
#define FIX_0_765366865 FIX(0.765366865)
#define FIX_0_899976223 FIX(0.899976223)
#define FIX_1_175875602 FIX(1.175875602)
#define FIX_1_501321110 FIX(1.501321110)
#define FIX_1_847759065 FIX(1.847759065)
#define FIX_1_961570560 FIX(1.961570560)
#define FIX_2_053119869 FIX(2.053119869)
#define FIX_2_562915447 FIX(2.562915447)
#define FIX_3_072711026 FIX(3.072711026)
#endif
/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
* For 8-bit samples with the recommended scaling, all the variable
* and constant values involved are no more than 16 bits wide, so a
* 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
* For 12-bit samples, a full 32-bit multiplication will be needed.
*/
#if BITS_IN_JSAMPLE == 8 && CONST_BITS<=13 && PASS1_BITS<=2
#define MULTIPLY(var,const) MULTIPLY16C16(var,const)
#else
#define MULTIPLY(var,const) ((var) * (const))
#endif
static av_always_inline void FUNC(row_fdct)(DCTELEM *data)
{
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5;
DCTELEM *dataptr;
int ctr;
/* Pass 1: process rows. */
/* Note results are scaled up by sqrt(8) compared to a true DCT; */
/* furthermore, we scale the results by 2**PASS1_BITS. */
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[0] + dataptr[7];
tmp7 = dataptr[0] - dataptr[7];
tmp1 = dataptr[1] + dataptr[6];
tmp6 = dataptr[1] - dataptr[6];
tmp2 = dataptr[2] + dataptr[5];
tmp5 = dataptr[2] - dataptr[5];
tmp3 = dataptr[3] + dataptr[4];
tmp4 = dataptr[3] - dataptr[4];
/* Even part per LL&M figure 1 --- note that published figure is faulty;
* rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
*/
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS-PASS1_BITS);
dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS-PASS1_BITS);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
* cK represents cos(K*pi/16).
* i0..i3 in the paper are tmp4..tmp7 here.
*/
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
dataptr += DCTSIZE; /* advance pointer to next row */
}
}
/*
* Perform the forward DCT on one block of samples.
*/
GLOBAL(void)
FUNC(ff_jpeg_fdct_islow)(DCTELEM *data)
{
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1, z2, z3, z4, z5;
DCTELEM *dataptr;
int ctr;
FUNC(row_fdct)(data);
/* Pass 2: process columns.
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
*/
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
tmp7 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
tmp6 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
/* Even part per LL&M figure 1 --- note that published figure is faulty;
* rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
*/
tmp10 = tmp0 + tmp3;
tmp13 = tmp0 - tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS + OUT_SHIFT);
dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS + OUT_SHIFT);
/* Odd part per figure 8 --- note paper omits factor of sqrt(2).
* cK represents cos(K*pi/16).
* i0..i3 in the paper are tmp4..tmp7 here.
*/
z1 = tmp4 + tmp7;
z2 = tmp5 + tmp6;
z3 = tmp4 + tmp6;
z4 = tmp5 + tmp7;
z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
z1 = MULTIPLY(z1, - FIX_0_899976223); /* sqrt(2) * (c7-c3) */
z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
z3 += z5;
z4 += z5;
dataptr[DCTSIZE*7] = DESCALE(tmp4 + z1 + z3, CONST_BITS + OUT_SHIFT);
dataptr[DCTSIZE*5] = DESCALE(tmp5 + z2 + z4, CONST_BITS + OUT_SHIFT);
dataptr[DCTSIZE*3] = DESCALE(tmp6 + z2 + z3, CONST_BITS + OUT_SHIFT);
dataptr[DCTSIZE*1] = DESCALE(tmp7 + z1 + z4, CONST_BITS + OUT_SHIFT);
dataptr++; /* advance pointer to next column */
}
}
/*
* The secret of DCT2-4-8 is really simple -- you do the usual 1-DCT
* on the rows and then, instead of doing even and odd, part on the colums
* you do even part two times.
*/
GLOBAL(void)
FUNC(ff_fdct248_islow)(DCTELEM *data)
{
int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
int tmp10, tmp11, tmp12, tmp13;
int z1;
DCTELEM *dataptr;
int ctr;
FUNC(row_fdct)(data);
/* Pass 2: process columns.
* We remove the PASS1_BITS scaling, but leave the results scaled up
* by an overall factor of 8.
*/
dataptr = data;
for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1];
tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1];
tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
tmp10 = tmp0 + tmp3;
tmp11 = tmp1 + tmp2;
tmp12 = tmp1 - tmp2;
tmp13 = tmp0 - tmp3;
dataptr[DCTSIZE*0] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
dataptr[DCTSIZE*4] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*2] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS+OUT_SHIFT);
dataptr[DCTSIZE*6] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS+OUT_SHIFT);
tmp10 = tmp4 + tmp7;
tmp11 = tmp5 + tmp6;
tmp12 = tmp5 - tmp6;
tmp13 = tmp4 - tmp7;
dataptr[DCTSIZE*1] = DESCALE(tmp10 + tmp11, OUT_SHIFT);
dataptr[DCTSIZE*5] = DESCALE(tmp10 - tmp11, OUT_SHIFT);
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
dataptr[DCTSIZE*3] = DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
CONST_BITS + OUT_SHIFT);
dataptr[DCTSIZE*7] = DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
CONST_BITS + OUT_SHIFT);
dataptr++; /* advance pointer to next column */
}
}

@ -421,13 +421,14 @@ static void ff_fdct_mlib(DCTELEM *data)
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
c->get_pixels = get_pixels_mlib;
c->diff_pixels = diff_pixels_mlib;
c->add_pixels_clamped = add_pixels_clamped_mlib;
if (!high_bit_depth) {
c->get_pixels = get_pixels_mlib;
c->put_pixels_tab[0][0] = put_pixels16_mlib;
c->put_pixels_tab[0][1] = put_pixels16_x2_mlib;
c->put_pixels_tab[0][2] = put_pixels16_y2_mlib;

@ -1527,6 +1527,22 @@ static int mpeg4_decode_gop_header(MpegEncContext * s, GetBitContext *gb){
return 0;
}
static int mpeg4_decode_profile_level(MpegEncContext * s, GetBitContext *gb){
int profile_and_level_indication;
profile_and_level_indication = get_bits(gb, 8);
s->avctx->profile = (profile_and_level_indication & 0xf0) >> 4;
s->avctx->level = (profile_and_level_indication & 0x0f);
// for Simple profile, level 0
if (s->avctx->profile == 0 && s->avctx->level == 8) {
s->avctx->level = 0;
}
return 0;
}
static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
int width, height, vo_ver_id;
@ -2181,6 +2197,9 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
else if(startcode == GOP_STARTCODE){
mpeg4_decode_gop_header(s, gb);
}
else if(startcode == VOS_STARTCODE){
mpeg4_decode_profile_level(s, gb);
}
else if(startcode == VOP_STARTCODE){
break;
}
@ -2241,6 +2260,25 @@ static av_cold int decode_init(AVCodecContext *avctx)
return 0;
}
static const AVProfile mpeg4_video_profiles[] = {
{ FF_PROFILE_MPEG4_SIMPLE, "Simple Profile" },
{ FF_PROFILE_MPEG4_SIMPLE_SCALABLE, "Simple Scalable Profile" },
{ FF_PROFILE_MPEG4_CORE, "Core Profile" },
{ FF_PROFILE_MPEG4_MAIN, "Main Profile" },
{ FF_PROFILE_MPEG4_N_BIT, "N-bit Profile" },
{ FF_PROFILE_MPEG4_SCALABLE_TEXTURE, "Scalable Texture Profile" },
{ FF_PROFILE_MPEG4_SIMPLE_FACE_ANIMATION, "Simple Face Animation Profile" },
{ FF_PROFILE_MPEG4_BASIC_ANIMATED_TEXTURE, "Basic Animated Texture Profile" },
{ FF_PROFILE_MPEG4_HYBRID, "Hybrid Profile" },
{ FF_PROFILE_MPEG4_ADVANCED_REAL_TIME, "Advanced Real Time Simple Profile" },
{ FF_PROFILE_MPEG4_CORE_SCALABLE, "Code Scalable Profile" },
{ FF_PROFILE_MPEG4_ADVANCED_CODING, "Advanced Coding Profile" },
{ FF_PROFILE_MPEG4_ADVANCED_CORE, "Advanced Core Profile" },
{ FF_PROFILE_MPEG4_ADVANCED_SCALABLE_TEXTURE, "Advanced Scalable Texture Profile" },
{ FF_PROFILE_MPEG4_SIMPLE_STUDIO, "Simple Studio Profile" },
{ FF_PROFILE_MPEG4_ADVANCED_SIMPLE, "Advanced Simple Profile" },
};
AVCodec ff_mpeg4_decoder = {
"mpeg4",
AVMEDIA_TYPE_VIDEO,
@ -2255,6 +2293,7 @@ AVCodec ff_mpeg4_decoder = {
.max_lowres= 3,
.long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2"),
.pix_fmts= ff_hwaccel_pixfmt_list_420,
.profiles = NULL_IF_CONFIG_SMALL(mpeg4_video_profiles),
.update_thread_context= ONLY_IF_THREADS_ENABLED(ff_mpeg_update_thread_context)
};

@ -69,7 +69,8 @@ void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][6
for(qscale=qmin; qscale<=qmax; qscale++){
int i;
if (dsp->fdct == ff_jpeg_fdct_islow
if (dsp->fdct == ff_jpeg_fdct_islow_8 ||
dsp->fdct == ff_jpeg_fdct_islow_10
#ifdef FAAN_POSTSCALE
|| dsp->fdct == ff_faandct
#endif

@ -1373,7 +1373,7 @@ static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int l
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
c->pix_abs[0][1] = sad16_x2_altivec;
c->pix_abs[0][2] = sad16_y2_altivec;
@ -1387,11 +1387,10 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
c->sse[0]= sse16_altivec;
c->pix_sum = pix_sum_altivec;
c->diff_pixels = diff_pixels_altivec;
c->get_pixels = get_pixels_altivec;
if (!high_bit_depth)
c->clear_block = clear_block_altivec;
c->add_bytes= add_bytes_altivec;
if (!high_bit_depth) {
c->get_pixels = get_pixels_altivec;
c->clear_block = clear_block_altivec;
c->put_pixels_tab[0][0] = put_pixels16_altivec;
/* the two functions do the same thing, so use the same code */
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;

@ -145,7 +145,7 @@ static void prefetch_ppc(void *mem, int stride, int h)
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
// Common optimizations whether AltiVec is available or not
c->prefetch = prefetch_ppc;
@ -172,8 +172,9 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
c->gmc1 = gmc1_altivec;
#if CONFIG_ENCODERS
if (avctx->dct_algo == FF_DCT_AUTO ||
avctx->dct_algo == FF_DCT_ALTIVEC) {
if (avctx->bits_per_raw_sample <= 8 &&
(avctx->dct_algo == FF_DCT_AUTO ||
avctx->dct_algo == FF_DCT_ALTIVEC)) {
c->fdct = fdct_altivec;
}
#endif //CONFIG_ENCODERS

@ -967,7 +967,7 @@ H264_WEIGHT( 8, 8)
H264_WEIGHT( 8, 4)
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
if (!high_bit_depth) {

@ -142,7 +142,7 @@ static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_siz
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
{
const int idct_algo= avctx->idct_algo;
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
c->clear_blocks = clear_blocks_mmi;
@ -152,9 +152,9 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
c->put_pixels_tab[0][0] = put_pixels16_mmi;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
}
c->get_pixels = get_pixels_mmi;
}
if (avctx->bits_per_raw_sample <= 8 &&
(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2)) {

@ -333,7 +333,7 @@ DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
c->put_pixels_tab[0][0] = put_rnd_pixels16_o;

@ -92,7 +92,7 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
{
const int idct_algo= avctx->idct_algo;
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
dsputil_init_align(c,avctx);
if (!high_bit_depth)

@ -3953,7 +3953,7 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
{
/* VIS-specific optimizations */
int accel = vis_level ();
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (accel & ACCEL_SPARC_VIS) {
if (avctx->bits_per_raw_sample <= 8 &&

@ -53,6 +53,7 @@ static void get_pixels_8x4_sym_sse2(DCTELEM *block, const uint8_t *pixels, int l
void ff_dnxhd_init_mmx(DNXHDEncContext *ctx)
{
if (av_get_cpu_flags() & AV_CPU_FLAG_SSE2) {
ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2;
if (ctx->cid_table->bit_depth == 8)
ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2;
}
}

@ -2341,7 +2341,7 @@ void ff_vector_clip_int32_sse41 (int32_t *dst, const int32_t *src, int32_t min
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
const int bit_depth = avctx->bits_per_raw_sample;
if (avctx->dsp_mask) {

@ -1098,10 +1098,12 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int si
void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
int bit_depth = avctx->bits_per_raw_sample;
if (mm_flags & AV_CPU_FLAG_MMX) {
const int dct_algo = avctx->dct_algo;
if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
if (avctx->bits_per_raw_sample <= 8 &&
(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)) {
if(mm_flags & AV_CPU_FLAG_SSE2){
c->fdct = ff_fdct_sse2;
}else if(mm_flags & AV_CPU_FLAG_MMX2){
@ -1111,7 +1113,8 @@ void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
}
c->get_pixels = get_pixels_mmx;
if (bit_depth <= 8)
c->get_pixels = get_pixels_mmx;
c->diff_pixels = diff_pixels_mmx;
c->pix_sum = pix_sum16_mmx;
@ -1158,7 +1161,8 @@ void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
}
if(mm_flags & AV_CPU_FLAG_SSE2){
c->get_pixels = get_pixels_sse2;
if (bit_depth <= 8)
c->get_pixels = get_pixels_sse2;
c->sum_abs_dctelem= sum_abs_dctelem_sse2;
#if HAVE_YASM && HAVE_ALIGNED_STACK
c->hadamard8_diff[0]= ff_hadamard8_diff16_sse2;

@ -456,7 +456,7 @@ static int mov_read_hdlr(MOVContext *c, AVIOContext *pb, MOVAtom atom)
st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
else if(type == MKTAG('m','1','a',' '))
st->codec->codec_id = CODEC_ID_MP2;
else if(type == MKTAG('s','u','b','p'))
else if((type == MKTAG('s','u','b','p')) || (type == MKTAG('c','l','c','p')))
st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
avio_rb32(pb); /* component manufacture */

@ -218,22 +218,6 @@ static int rtp_valid_packet_in_sequence(RTPStatistics *s, uint16_t seq)
return 1;
}
#if 0
/**
* This function is currently unused; without a valid local ntp time, I don't see how we could calculate the
* difference between the arrival and sent timestamp. As a result, the jitter and transit statistics values
* never change. I left this in in case someone else can see a way. (rdm)
*/
static void rtcp_update_jitter(RTPStatistics *s, uint32_t sent_timestamp, uint32_t arrival_timestamp)
{
uint32_t transit= arrival_timestamp - sent_timestamp;
int d;
s->transit= transit;
d= FFABS(transit - s->transit);
s->jitter += d - ((s->jitter + 8)>>4);
}
#endif
int rtp_check_and_send_back_rr(RTPDemuxContext *s, int count)
{
AVIOContext *pb;

@ -225,20 +225,6 @@ static int rtp_read(URLContext *h, uint8_t *buf, int size)
int len, n;
struct pollfd p[2] = {{s->rtp_fd, POLLIN, 0}, {s->rtcp_fd, POLLIN, 0}};
#if 0
for(;;) {
from_len = sizeof(from);
len = recvfrom (s->rtp_fd, buf, size, 0,
(struct sockaddr *)&from, &from_len);
if (len < 0) {
if (ff_neterrno() == AVERROR(EAGAIN) ||
ff_neterrno() == AVERROR(EINTR))
continue;
return AVERROR(EIO);
}
break;
}
#else
for(;;) {
if (url_interrupt_cb())
return AVERROR_EXIT;
@ -277,7 +263,6 @@ static int rtp_read(URLContext *h, uint8_t *buf, int size)
return AVERROR(EIO);
}
}
#endif
return len;
}
@ -296,14 +281,6 @@ static int rtp_write(URLContext *h, const uint8_t *buf, int size)
}
ret = ffurl_write(hd, buf, size);
#if 0
{
struct timespec ts;
ts.tv_sec = 0;
ts.tv_nsec = 10 * 1000000;
nanosleep(&ts, NULL);
}
#endif
return ret;
}

@ -235,6 +235,11 @@ do_video_encoding dnxhd-720p-rd.dnxhd "-threads 4 -mbd rd -s hd720 -b 90M -pix_f
do_video_decoding "" "-s cif -pix_fmt yuv420p"
fi
if [ -n "$do_dnxhd_720p_10bit" ] ; then
do_video_encoding dnxhd-720p-10bit.dnxhd "-s hd720 -b 90M -pix_fmt yuv422p10 -vframes 5 -an"
do_video_decoding "" "-s cif -pix_fmt yuv420p"
fi
if [ -n "$do_svq1" ] ; then
do_video_encoding svq1.mov "-an -vcodec svq1 -qscale 3 -pix_fmt yuv410p"
do_video_decoding "" "-pix_fmt yuv420p"

@ -0,0 +1,4 @@
cb29b6ae4e1562d95f9311991fef98df *./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd
2293760 ./tests/data/vsynth1/dnxhd-720p-10bit.dnxhd
2f45bb1af7da5dd3dca870ac87237b7d *./tests/data/dnxhd_720p_10bit.vsynth1.out.yuv
stddev: 6.27 PSNR: 32.18 MAXDIFF: 64 bytes: 760320/ 7603200

@ -0,0 +1,4 @@
8648511257afb816b5b911706ca391db *./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd
2293760 ./tests/data/vsynth2/dnxhd-720p-10bit.dnxhd
391b6f5aa7c7b488b479cb43d420b860 *./tests/data/dnxhd_720p_10bit.vsynth2.out.yuv
stddev: 1.35 PSNR: 45.46 MAXDIFF: 23 bytes: 760320/ 7603200
Loading…
Cancel
Save