@ -161,6 +161,10 @@ typedef struct DiracContext {
unsigned num_x ; /* number of horizontal slices */
unsigned num_y ; /* number of vertical slices */
uint8_t * thread_buf ; /* Per-thread buffer for coefficient storage */
int threads_num_buf ; /* Current # of buffers allocated */
int thread_buf_size ; /* Each thread has a buffer this size */
struct {
unsigned width ;
unsigned height ;
@ -370,6 +374,10 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx)
s - > avctx = avctx ;
s - > frame_number = - 1 ;
s - > thread_buf = NULL ;
s - > threads_num_buf = - 1 ;
s - > thread_buf_size = - 1 ;
ff_diracdsp_init ( & s - > diracdsp ) ;
ff_mpegvideoencdsp_init ( & s - > mpvencdsp , avctx ) ;
ff_videodsp_init ( & s - > vdsp , 8 ) ;
@ -403,6 +411,8 @@ static av_cold int dirac_decode_end(AVCodecContext *avctx)
for ( i = 0 ; i < MAX_FRAMES ; i + + )
av_frame_free ( & s - > all_frames [ i ] . avframe ) ;
av_freep ( & s - > thread_buf ) ;
return 0 ;
}
@ -760,46 +770,108 @@ static int decode_lowdelay_slice(AVCodecContext *avctx, void *arg)
return 0 ;
}
typedef struct SliceCoeffs {
int left ;
int top ;
int tot_h ;
int tot_v ;
int tot ;
} SliceCoeffs ;
static int subband_coeffs ( DiracContext * s , int x , int y , int p ,
SliceCoeffs c [ MAX_DWT_LEVELS ] )
{
int level , coef = 0 ;
for ( level = 0 ; level < s - > wavelet_depth ; level + + ) {
SliceCoeffs * o = & c [ level ] ;
SubBand * b = & s - > plane [ p ] . band [ level ] [ 3 ] ; /* orientation doens't matter */
o - > top = b - > height * y / s - > num_y ;
o - > left = b - > width * x / s - > num_x ;
o - > tot_h = ( ( b - > width * ( x + 1 ) ) / s - > num_x ) - o - > left ;
o - > tot_v = ( ( b - > height * ( y + 1 ) ) / s - > num_y ) - o - > top ;
o - > tot = o - > tot_h * o - > tot_v ;
coef + = o - > tot * ( 4 - ! ! level ) ;
}
return coef ;
}
/**
* VC - 2 Specification - >
* 13.5 .3 hq_slice ( sx , sy )
*/
static int decode_hq_slice ( AVCodecContext * avctx , void * arg )
static int decode_hq_slice ( DiracContext * s , DiracSlice * slice , uint8_t * tmp_buf )
{
int i , quant , level , orientation , quant_idx ;
uint8_t quants [ MAX_DWT_LEVELS ] [ 4 ] ;
DiracContext * s = avctx - > priv_data ;
DiracSlice * slice = arg ;
int i , level , orientation , quant_idx ;
int qfactor [ MAX_DWT_LEVELS ] [ 4 ] , qoffset [ MAX_DWT_LEVELS ] [ 4 ] ;
GetBitContext * gb = & slice - > gb ;
SliceCoeffs coeffs_num [ MAX_DWT_LEVELS ] ;
skip_bits_long ( gb , 8 * s - > highquality . prefix_bytes ) ;
quant_idx = get_bits ( gb , 8 ) ;
if ( quant_idx > DIRAC_MAX_QUANT_INDEX ) {
av_log ( s - > avctx , AV_LOG_ERROR , " Invalid quantization index - %i \n " , quant_idx ) ;
return AVERROR_INVALIDDATA ;
}
/* Slice quantization (slice_quantizers() in the specs) */
for ( level = 0 ; level < s - > wavelet_depth ; level + + ) {
for ( orientation = ! ! level ; orientation < 4 ; orientation + + ) {
quant = FFMAX ( quant_idx - s - > lowdelay . quant [ level ] [ orientation ] , 0 ) ;
quants [ level ] [ orientation ] = quant ;
const int quant = FFMAX ( quant_idx - s - > lowdelay . quant [ level ] [ orientation ] , 0 ) ;
qfactor [ level ] [ orientation ] = ff_dirac_qscale_tab [ quant ] ;
qoffset [ level ] [ orientation ] = ff_dirac_qoffset_intra_tab [ quant ] + 2 ;
}
}
/* Luma + 2 Chroma planes */
for ( i = 0 ; i < 3 ; i + + ) {
int64_t length = s - > highquality . size_scaler * get_bits ( gb , 8 ) ;
int64_t bits_left = 8 * length ;
int64_t bits_end = get_bits_count ( gb ) + bits_left ;
int c , coef_num , coef_par , off = 0 ;
int64_t length = s - > highquality . size_scaler * get_bits ( gb , 8 ) ;
int64_t start = get_bits_count ( gb ) ;
int64_t bits_end = start + 8 * length ;
if ( bits_end > = INT_MAX ) {
av_log ( s - > avctx , AV_LOG_ERROR , " end too far away \n " ) ;
return AVERROR_INVALIDDATA ;
}
coef_num = subband_coeffs ( s , slice - > slice_x , slice - > slice_y , i , coeffs_num ) ;
if ( s - > pshift ) {
int32_t * dst = ( int32_t * ) tmp_buf ;
for ( c = 0 ; c < coef_num ; c + + )
dst [ c ] = dirac_get_se_golomb ( gb ) ;
coef_par = c ;
} else {
int16_t * dst = ( int16_t * ) tmp_buf ;
for ( c = 0 ; c < coef_num ; c + + )
dst [ c ] = dirac_get_se_golomb ( gb ) ;
coef_par = c ;
}
if ( coef_num > coef_par ) {
const int start_b = coef_par * ( 4 > > s - > pshift ) ;
const int end_b = coef_num * ( 4 > > s - > pshift ) ;
memset ( & tmp_buf [ start_b ] , 0 , end_b - start_b ) ;
}
for ( level = 0 ; level < s - > wavelet_depth ; level + + ) {
const SliceCoeffs * c = & coeffs_num [ level ] ;
for ( orientation = ! ! level ; orientation < 4 ; orientation + + ) {
decode_subband ( s , gb , quants [ level ] [ orientation ] , slice - > slice_x , slice - > slice_y , bits_end ,
& s - > plane [ i ] . band [ level ] [ orientation ] , NULL ) ;
const SubBand * b1 = & s - > plane [ i ] . band [ level ] [ orientation ] ;
uint8_t * buf = b1 - > ibuf + c - > top * b1 - > stride + ( c - > left < < ( s - > pshift + 1 ) ) ;
/* Change to c->tot_h <= 4 for AVX2 dequantization */
const int qfunc = s - > pshift + 2 * ( c - > tot_h < = 2 ) ;
s - > diracdsp . dequant_subband [ qfunc ] ( & tmp_buf [ off ] , buf , b1 - > stride ,
qfactor [ level ] [ orientation ] ,
qoffset [ level ] [ orientation ] ,
c - > tot_v , c - > tot_h ) ;
off + = c - > tot < < ( s - > pshift + 1 ) ;
}
}
skip_bits_long ( gb , bits_end - get_bits_count ( gb ) ) ;
}
@ -811,8 +883,9 @@ static int decode_hq_slice_row(AVCodecContext *avctx, void *arg, int jobnr, int
int i ;
DiracContext * s = avctx - > priv_data ;
DiracSlice * slices = ( ( DiracSlice * ) arg ) + s - > num_x * jobnr ;
uint8_t * thread_buf = & s - > thread_buf [ s - > thread_buf_size * threadnr ] ;
for ( i = 0 ; i < s - > num_x ; i + + )
decode_hq_slice ( avctx , & slices [ i ] ) ;
decode_hq_slice ( s , & slices [ i ] , thread_buf ) ;
return 0 ;
}
@ -824,15 +897,32 @@ static int decode_lowdelay(DiracContext *s)
{
AVCodecContext * avctx = s - > avctx ;
int slice_x , slice_y , bufsize ;
int64_t bytes = 0 ;
int64_t coef_buf_size , bytes = 0 ;
const uint8_t * buf ;
DiracSlice * slices ;
SliceCoeffs tmp [ MAX_DWT_LEVELS ] ;
int slice_num = 0 ;
slices = av_mallocz_array ( s - > num_x , s - > num_y * sizeof ( DiracSlice ) ) ;
if ( ! slices )
return AVERROR ( ENOMEM ) ;
/* 8 becacuse that's how much the golomb reader could overread junk data
* from another plane / slice at most , and 512 because SIMD */
coef_buf_size = subband_coeffs ( s , s - > num_x - 1 , s - > num_y - 1 , 0 , tmp ) + 8 ;
coef_buf_size = ( coef_buf_size < < ( 1 + s - > pshift ) ) + 512 ;
if ( s - > threads_num_buf ! = avctx - > thread_count | |
s - > thread_buf_size ! = coef_buf_size ) {
s - > threads_num_buf = avctx - > thread_count ;
s - > thread_buf_size = coef_buf_size ;
s - > thread_buf = av_realloc_f ( s - > thread_buf , avctx - > thread_count , s - > thread_buf_size ) ;
if ( ! s - > thread_buf ) {
av_log ( s - > avctx , AV_LOG_ERROR , " thread buffer allocation failure \n " ) ;
return AVERROR ( ENOMEM ) ;
}
}
align_get_bits ( & s - > gb ) ;
/*[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) */
buf = s - > gb . buffer + get_bits_count ( & s - > gb ) / 8 ;
@ -848,7 +938,7 @@ static int decode_lowdelay(DiracContext *s)
if ( bytes < = bufsize / 8 )
bytes + = buf [ bytes ] * s - > highquality . size_scaler + 1 ;
}
if ( bytes > = INT_MAX ) {
if ( bytes > = INT_MAX | | bytes * 8 > bufsize ) {
av_log ( s - > avctx , AV_LOG_ERROR , " too many bytes \n " ) ;
av_free ( slices ) ;
return AVERROR_INVALIDDATA ;
@ -867,6 +957,12 @@ static int decode_lowdelay(DiracContext *s)
bufsize = 0 ;
}
}
if ( s - > num_x * s - > num_y ! = slice_num ) {
av_log ( s - > avctx , AV_LOG_ERROR , " too few slices \n " ) ;
return AVERROR_INVALIDDATA ;
}
avctx - > execute2 ( avctx , decode_hq_slice_row , slices , NULL , s - > num_y ) ;
} else {
for ( slice_y = 0 ; bufsize > 0 & & slice_y < s - > num_y ; slice_y + + ) {