@ -34,8 +34,14 @@
# include "avcodec.h"
# include "decode.h"
# include "hwaccel.h"
# include "nvdec.h"
# include "internal.h"
# if !NVDECAPI_CHECK_VERSION(9, 0)
# define cudaVideoSurfaceFormat_YUV444 2
# define cudaVideoSurfaceFormat_YUV444_16Bit 3
# endif
typedef struct CuvidContext
{
AVClass * avclass ;
@ -106,6 +112,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
CUVIDDECODECAPS * caps = NULL ;
CUVIDDECODECREATEINFO cuinfo ;
int surface_fmt ;
int chroma_444 ;
int old_width = avctx - > width ;
int old_height = avctx - > height ;
@ -148,17 +155,19 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
cuinfo . target_rect . right = cuinfo . ulTargetWidth ;
cuinfo . target_rect . bottom = cuinfo . ulTargetHeight ;
chroma_444 = format - > chroma_format = = cudaVideoChromaFormat_444 ;
switch ( format - > bit_depth_luma_minus8 ) {
case 0 : // 8-bit
pix_fmts [ 1 ] = AV_PIX_FMT_NV12 ;
pix_fmts [ 1 ] = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12 ;
caps = & ctx - > caps8 ;
break ;
case 2 : // 10-bit
pix_fmts [ 1 ] = AV_PIX_FMT_P010 ;
pix_fmts [ 1 ] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010 ;
caps = & ctx - > caps10 ;
break ;
case 4 : // 12-bit
pix_fmts [ 1 ] = AV_PIX_FMT_P016 ;
pix_fmts [ 1 ] = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016 ;
caps = & ctx - > caps12 ;
break ;
default :
@ -261,12 +270,6 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
return 0 ;
}
if ( format - > chroma_format ! = cudaVideoChromaFormat_420 ) {
av_log ( avctx , AV_LOG_ERROR , " Chroma formats other than 420 are not supported \n " ) ;
ctx - > internal_error = AVERROR ( EINVAL ) ;
return 0 ;
}
ctx - > chroma_format = format - > chroma_format ;
cuinfo . CodecType = ctx - > codec_type = format - > codec ;
@ -280,8 +283,15 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
case AV_PIX_FMT_P016 :
cuinfo . OutputFormat = cudaVideoSurfaceFormat_P016 ;
break ;
case AV_PIX_FMT_YUV444P :
cuinfo . OutputFormat = cudaVideoSurfaceFormat_YUV444 ;
break ;
case AV_PIX_FMT_YUV444P16 :
cuinfo . OutputFormat = cudaVideoSurfaceFormat_YUV444_16Bit ;
break ;
default :
av_log ( avctx , AV_LOG_ERROR , " Output formats other than NV12, P010 or P016 are not supported \n " ) ;
av_log ( avctx , AV_LOG_ERROR , " Unsupported output format: %s \n " ,
av_get_pix_fmt_name ( avctx - > sw_pix_fmt ) ) ;
ctx - > internal_error = AVERROR ( EINVAL ) ;
return 0 ;
}
@ -490,6 +500,7 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
return ret ;
if ( av_fifo_size ( ctx - > frame_queue ) ) {
const AVPixFmtDescriptor * pixdesc ;
CuvidParsedFrame parsed_frame ;
CUVIDPROCPARAMS params ;
unsigned int pitch = 0 ;
@ -520,7 +531,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
goto error ;
}
for ( i = 0 ; i < 2 ; i + + ) {
pixdesc = av_pix_fmt_desc_get ( avctx - > sw_pix_fmt ) ;
for ( i = 0 ; i < pixdesc - > nb_components ; i + + ) {
int height = avctx - > height > > ( i ? pixdesc - > log2_chroma_h : 0 ) ;
CUDA_MEMCPY2D cpy = {
. srcMemoryType = CU_MEMORYTYPE_DEVICE ,
. dstMemoryType = CU_MEMORYTYPE_DEVICE ,
@ -530,14 +544,14 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
. dstPitch = frame - > linesize [ i ] ,
. srcY = offset ,
. WidthInBytes = FFMIN ( pitch , frame - > linesize [ i ] ) ,
. Height = avctx - > height > > ( i ? 1 : 0 ) ,
. Height = height ,
} ;
ret = CHECK_CU ( ctx - > cudl - > cuMemcpy2DAsync ( & cpy , device_hwctx - > stream ) ) ;
if ( ret < 0 )
goto error ;
offset + = avctx - > height ;
offset + = height ;
}
ret = CHECK_CU ( ctx - > cudl - > cuStreamSynchronize ( device_hwctx - > stream ) ) ;
@ -545,7 +559,10 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
goto error ;
} else if ( avctx - > pix_fmt = = AV_PIX_FMT_NV12 | |
avctx - > pix_fmt = = AV_PIX_FMT_P010 | |
avctx - > pix_fmt = = AV_PIX_FMT_P016 ) {
avctx - > pix_fmt = = AV_PIX_FMT_P016 | |
avctx - > pix_fmt = = AV_PIX_FMT_YUV444P | |
avctx - > pix_fmt = = AV_PIX_FMT_YUV444P16 ) {
unsigned int offset = 0 ;
AVFrame * tmp_frame = av_frame_alloc ( ) ;
if ( ! tmp_frame ) {
av_log ( avctx , AV_LOG_ERROR , " av_frame_alloc failed \n " ) ;
@ -553,15 +570,24 @@ static int cuvid_output_frame(AVCodecContext *avctx, AVFrame *frame)
goto error ;
}
pixdesc = av_pix_fmt_desc_get ( avctx - > sw_pix_fmt ) ;
tmp_frame - > format = AV_PIX_FMT_CUDA ;
tmp_frame - > hw_frames_ctx = av_buffer_ref ( ctx - > hwframe ) ;
tmp_frame - > data [ 0 ] = ( uint8_t * ) mapped_frame ;
tmp_frame - > linesize [ 0 ] = pitch ;
tmp_frame - > data [ 1 ] = ( uint8_t * ) ( mapped_frame + avctx - > height * pitch ) ;
tmp_frame - > linesize [ 1 ] = pitch ;
tmp_frame - > width = avctx - > width ;
tmp_frame - > height = avctx - > height ;
/*
* Note that the following logic would not work for three plane
* YUV420 because the pitch value is different for the chroma
* planes .
*/
for ( i = 0 ; i < pixdesc - > nb_components ; i + + ) {
tmp_frame - > data [ i ] = ( uint8_t * ) mapped_frame + offset ;
tmp_frame - > linesize [ i ] = pitch ;
offset + = pitch * ( avctx - > height > > ( i ? pixdesc - > log2_chroma_h : 0 ) ) ;
}
ret = ff_get_buffer ( avctx , frame , 0 ) ;
if ( ret < 0 ) {
av_log ( avctx , AV_LOG_ERROR , " ff_get_buffer failed \n " ) ;