@ -644,16 +644,34 @@ static void nvenc_override_rate_control(AVCodecContext *avctx)
static av_cold int nvenc_recalc_surfaces ( AVCodecContext * avctx )
static av_cold int nvenc_recalc_surfaces ( AVCodecContext * avctx )
{
{
NvencContext * ctx = avctx - > priv_data ;
NvencContext * ctx = avctx - > priv_data ;
int nb_surfaces = 0 ;
// default minimum of 4 surfaces
// multiply by 2 for number of NVENCs on gpu (hardcode to 2)
// another multiply by 2 to avoid blocking next PBB group
int nb_surfaces = FFMAX ( 4 , ctx - > encode_config . frameIntervalP * 2 * 2 ) ;
// lookahead enabled
if ( ctx - > rc_lookahead > 0 ) {
if ( ctx - > rc_lookahead > 0 ) {
nb_surfaces = ctx - > rc_lookahead + ( ( ctx - > encode_config . frameIntervalP > 0 ) ? ctx - > encode_config . frameIntervalP : 0 ) + 1 + 4 ;
// +1 is to account for lkd_bound calculation later
if ( ctx - > nb_surfaces < nb_surfaces ) {
// +4 is to allow sufficient pipelining with lookahead
nb_surfaces = FFMAX ( 1 , FFMAX ( nb_surfaces , ctx - > rc_lookahead + ctx - > encode_config . frameIntervalP + 1 + 4 ) ) ;
if ( nb_surfaces > ctx - > nb_surfaces & & ctx - > nb_surfaces > 0 )
{
av_log ( avctx , AV_LOG_WARNING ,
av_log ( avctx , AV_LOG_WARNING ,
" Defined rc_lookahead requires more surfaces, "
" Defined rc_lookahead requires more surfaces, "
" increasing used surfaces %d -> %d \n " , ctx - > nb_surfaces , nb_surfaces ) ;
" increasing used surfaces %d -> %d \n " , ctx - > nb_surfaces , nb_surfaces ) ;
ctx - > nb_surfaces = nb_surfaces ;
}
}
ctx - > nb_surfaces = FFMAX ( nb_surfaces , ctx - > nb_surfaces ) ;
} else {
if ( ctx - > encode_config . frameIntervalP > 1 & & ctx - > nb_surfaces < nb_surfaces & & ctx - > nb_surfaces > 0 )
{
av_log ( avctx , AV_LOG_WARNING ,
" Defined b-frame requires more surfaces, "
" increasing used surfaces %d -> %d \n " , ctx - > nb_surfaces , nb_surfaces ) ;
ctx - > nb_surfaces = FFMAX ( ctx - > nb_surfaces , nb_surfaces ) ;
}
else if ( ctx - > nb_surfaces < = 0 )
ctx - > nb_surfaces = nb_surfaces ;
// otherwise use user specified value
}
}
ctx - > nb_surfaces = FFMAX ( 1 , FFMIN ( MAX_REGISTERED_FRAMES , ctx - > nb_surfaces ) ) ;
ctx - > nb_surfaces = FFMAX ( 1 , FFMIN ( MAX_REGISTERED_FRAMES , ctx - > nb_surfaces ) ) ;
@ -1086,6 +1104,7 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
NvencContext * ctx = avctx - > priv_data ;
NvencContext * ctx = avctx - > priv_data ;
NvencDynLoadFunctions * dl_fn = & ctx - > nvenc_dload_funcs ;
NvencDynLoadFunctions * dl_fn = & ctx - > nvenc_dload_funcs ;
NV_ENCODE_API_FUNCTION_LIST * p_nvenc = & dl_fn - > nvenc_funcs ;
NV_ENCODE_API_FUNCTION_LIST * p_nvenc = & dl_fn - > nvenc_funcs ;
NvencSurface * tmp_surface = & ctx - > surfaces [ idx ] ;
NVENCSTATUS nv_status ;
NVENCSTATUS nv_status ;
NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 } ;
NV_ENC_CREATE_BITSTREAM_BUFFER allocOut = { 0 } ;
@ -1121,8 +1140,6 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
ctx - > surfaces [ idx ] . height = allocSurf . height ;
ctx - > surfaces [ idx ] . height = allocSurf . height ;
}
}
ctx - > surfaces [ idx ] . lockCount = 0 ;
/* 1MB is large enough to hold most output frames.
/* 1MB is large enough to hold most output frames.
* NVENC increases this automaticaly if it is not enough . */
* NVENC increases this automaticaly if it is not enough . */
allocOut . size = 1024 * 1024 ;
allocOut . size = 1024 * 1024 ;
@ -1141,6 +1158,8 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
ctx - > surfaces [ idx ] . output_surface = allocOut . bitstreamBuffer ;
ctx - > surfaces [ idx ] . output_surface = allocOut . bitstreamBuffer ;
ctx - > surfaces [ idx ] . size = allocOut . size ;
ctx - > surfaces [ idx ] . size = allocOut . size ;
av_fifo_generic_write ( ctx - > unused_surface_queue , & tmp_surface , sizeof ( tmp_surface ) , NULL ) ;
return 0 ;
return 0 ;
}
}
@ -1156,6 +1175,11 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
ctx - > timestamp_list = av_fifo_alloc ( ctx - > nb_surfaces * sizeof ( int64_t ) ) ;
ctx - > timestamp_list = av_fifo_alloc ( ctx - > nb_surfaces * sizeof ( int64_t ) ) ;
if ( ! ctx - > timestamp_list )
if ( ! ctx - > timestamp_list )
return AVERROR ( ENOMEM ) ;
return AVERROR ( ENOMEM ) ;
ctx - > unused_surface_queue = av_fifo_alloc ( ctx - > nb_surfaces * sizeof ( NvencSurface * ) ) ;
if ( ! ctx - > unused_surface_queue )
return AVERROR ( ENOMEM ) ;
ctx - > output_surface_queue = av_fifo_alloc ( ctx - > nb_surfaces * sizeof ( NvencSurface * ) ) ;
ctx - > output_surface_queue = av_fifo_alloc ( ctx - > nb_surfaces * sizeof ( NvencSurface * ) ) ;
if ( ! ctx - > output_surface_queue )
if ( ! ctx - > output_surface_queue )
return AVERROR ( ENOMEM ) ;
return AVERROR ( ENOMEM ) ;
@ -1222,6 +1246,7 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
av_fifo_freep ( & ctx - > timestamp_list ) ;
av_fifo_freep ( & ctx - > timestamp_list ) ;
av_fifo_freep ( & ctx - > output_surface_ready_queue ) ;
av_fifo_freep ( & ctx - > output_surface_ready_queue ) ;
av_fifo_freep ( & ctx - > output_surface_queue ) ;
av_fifo_freep ( & ctx - > output_surface_queue ) ;
av_fifo_freep ( & ctx - > unused_surface_queue ) ;
if ( ctx - > surfaces & & avctx - > pix_fmt = = AV_PIX_FMT_CUDA ) {
if ( ctx - > surfaces & & avctx - > pix_fmt = = AV_PIX_FMT_CUDA ) {
for ( i = 0 ; i < ctx - > nb_surfaces ; + + i ) {
for ( i = 0 ; i < ctx - > nb_surfaces ; + + i ) {
@ -1305,16 +1330,14 @@ av_cold int ff_nvenc_encode_init(AVCodecContext *avctx)
static NvencSurface * get_free_frame ( NvencContext * ctx )
static NvencSurface * get_free_frame ( NvencContext * ctx )
{
{
int i ;
NvencSurface * tmp_surf ;
for ( i = 0 ; i < ctx - > nb_surfaces ; i + + ) {
if ( ! ctx - > surfaces [ i ] . lockCount ) {
ctx - > surfaces [ i ] . lockCount = 1 ;
return & ctx - > surfaces [ i ] ;
}
}
if ( ! ( av_fifo_size ( ctx - > unused_surface_queue ) > 0 ) )
// queue empty
return NULL ;
return NULL ;
av_fifo_generic_read ( ctx - > unused_surface_queue , & tmp_surf , sizeof ( tmp_surf ) , NULL ) ;
return tmp_surf ;
}
}
static int nvenc_copy_frame ( AVCodecContext * avctx , NvencSurface * nv_surface ,
static int nvenc_copy_frame ( AVCodecContext * avctx , NvencSurface * nv_surface ,
@ -1712,7 +1735,6 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
}
}
if ( res ) {
if ( res ) {
inSurf - > lockCount = 0 ;
return res ;
return res ;
}
}
@ -1790,8 +1812,7 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
if ( res )
if ( res )
return res ;
return res ;
av_assert0 ( tmpoutsurf - > lockCount ) ;
av_fifo_generic_write ( ctx - > unused_surface_queue , & tmpoutsurf , sizeof ( tmpoutsurf ) , NULL ) ;
tmpoutsurf - > lockCount - - ;
* got_packet = 1 ;
* got_packet = 1 ;
} else {
} else {