@ -41,11 +41,23 @@
# define CHECK_CU(x) FF_CUDA_CHECK_DL(cuda_cu, cu, x)
# endif
typedef struct VulkanQueueCtx {
VkFence fence ;
VkQueue queue ;
int was_synchronous ;
/* Buffer dependencies */
AVBufferRef * * buf_deps ;
int nb_buf_deps ;
int buf_deps_alloc_size ;
} VulkanQueueCtx ;
typedef struct VulkanExecCtx {
VkCommandPool pool ;
VkCommandBuffer buf ;
VkQueue queue ;
VkFence fence ;
VkCommandBuffer * bufs ;
VulkanQueueCtx * queues ;
int nb_queues ;
int cur_queue_idx ;
} VulkanExecCtx ;
typedef struct VulkanDevicePriv {
@ -60,8 +72,9 @@ typedef struct VulkanDevicePriv {
/* Debug callback */
VkDebugUtilsMessengerEXT debug_ctx ;
/* Image uploading */
VulkanExecCtx cmd ;
/* Image transfers */
VulkanExecCtx upload_ctx ;
VulkanExecCtx download_ctx ;
/* Extensions */
uint64_t extensions ;
@ -89,6 +102,16 @@ typedef struct AVVkFrameInternal {
# endif
} AVVkFrameInternal ;
# define GET_QUEUE_COUNT(hwctx, graph, comp, tx) ( \
graph ? hwctx - > nb_graphics_queues : \
comp ? ( hwctx - > nb_comp_queues ? \
hwctx - > nb_comp_queues : hwctx - > nb_graphics_queues ) : \
tx ? ( hwctx - > nb_tx_queues ? hwctx - > nb_tx_queues : \
( hwctx - > nb_comp_queues ? \
hwctx - > nb_comp_queues : hwctx - > nb_graphics_queues ) ) : \
0 \
)
# define VK_LOAD_PFN(inst, name) PFN_##name pfn_##name = (PFN_##name) \
vkGetInstanceProcAddr ( inst , # name )
@ -709,7 +732,7 @@ fail:
}
static int create_exec_ctx ( AVHWDeviceContext * ctx , VulkanExecCtx * cmd ,
int queue_family_index )
int queue_family_index , int num_queues )
{
VkResult ret ;
AVVulkanDeviceContext * hwctx = ctx - > hwctx ;
@ -722,21 +745,20 @@ static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd,
VkCommandBufferAllocateInfo cbuf_create = {
. sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO ,
. level = VK_COMMAND_BUFFER_LEVEL_PRIMARY ,
. commandBufferCount = 1 ,
. commandBufferCount = num_queues ,
} ;
VkFenceCreateInfo fence_spawn = {
. sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO ,
} ;
cmd - > nb_queues = num_queues ;
ret = vkCreateFence ( hwctx - > act_dev , & fence_spawn ,
hwctx - > alloc , & cmd - > fence ) ;
if ( ret ! = VK_SUCCESS ) {
av_log ( ctx , AV_LOG_ERROR , " Failed to create frame fence: %s \n " ,
vk_ret2str ( ret ) ) ;
return AVERROR_EXTERNAL ;
}
cmd - > queues = av_mallocz ( num_queues * sizeof ( * cmd - > queues ) ) ;
if ( ! cmd - > queues )
return AVERROR ( ENOMEM ) ;
cmd - > bufs = av_mallocz ( num_queues * sizeof ( * cmd - > bufs ) ) ;
if ( ! cmd - > bufs )
return AVERROR ( ENOMEM ) ;
/* Create command pool */
ret = vkCreateCommandPool ( hwctx - > act_dev , & cqueue_create ,
hwctx - > alloc , & cmd - > pool ) ;
if ( ret ! = VK_SUCCESS ) {
@ -747,15 +769,19 @@ static int create_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd,
cbuf_create . commandPool = cmd - > pool ;
ret = vkAllocateCommandBuffers ( hwctx - > act_dev , & cbuf_create , & cmd - > buf ) ;
/* Allocate command buffer */
ret = vkAllocateCommandBuffers ( hwctx - > act_dev , & cbuf_create , cmd - > bufs ) ;
if ( ret ! = VK_SUCCESS ) {
av_log ( ctx , AV_LOG_ERROR , " Command buffer alloc failure: %s \n " ,
vk_ret2str ( ret ) ) ;
return AVERROR_EXTERNAL ;
}
vkGetDeviceQueue ( hwctx - > act_dev , cqueue_create . queueFamilyIndex , 0 ,
& cmd - > queue ) ;
for ( int i = 0 ; i < num_queues ; i + + ) {
VulkanQueueCtx * q = & cmd - > queues [ i ] ;
vkGetDeviceQueue ( hwctx - > act_dev , queue_family_index , i , & q - > queue ) ;
q - > was_synchronous = 1 ;
}
return 0 ;
}
@ -764,12 +790,154 @@ static void free_exec_ctx(AVHWDeviceContext *ctx, VulkanExecCtx *cmd)
{
AVVulkanDeviceContext * hwctx = ctx - > hwctx ;
if ( cmd - > fence )
vkDestroyFence ( hwctx - > act_dev , cmd - > fence , hwctx - > alloc ) ;
if ( cmd - > buf )
vkFreeCommandBuffers ( hwctx - > act_dev , cmd - > pool , 1 , & cmd - > buf ) ;
/* Make sure all queues have finished executing */
for ( int i = 0 ; i < cmd - > nb_queues ; i + + ) {
VulkanQueueCtx * q = & cmd - > queues [ i ] ;
if ( q - > fence & & ! q - > was_synchronous ) {
vkWaitForFences ( hwctx - > act_dev , 1 , & q - > fence , VK_TRUE , UINT64_MAX ) ;
vkResetFences ( hwctx - > act_dev , 1 , & q - > fence ) ;
}
/* Free the fence */
if ( q - > fence )
vkDestroyFence ( hwctx - > act_dev , q - > fence , hwctx - > alloc ) ;
/* Free buffer dependencies */
for ( int j = 0 ; j < q - > nb_buf_deps ; j + + )
av_buffer_unref ( & q - > buf_deps [ j ] ) ;
av_free ( q - > buf_deps ) ;
}
if ( cmd - > bufs )
vkFreeCommandBuffers ( hwctx - > act_dev , cmd - > pool , cmd - > nb_queues , cmd - > bufs ) ;
if ( cmd - > pool )
vkDestroyCommandPool ( hwctx - > act_dev , cmd - > pool , hwctx - > alloc ) ;
av_freep ( & cmd - > bufs ) ;
av_freep ( & cmd - > queues ) ;
}
static VkCommandBuffer get_buf_exec_ctx ( AVHWDeviceContext * ctx , VulkanExecCtx * cmd )
{
return cmd - > bufs [ cmd - > cur_queue_idx ] ;
}
static void unref_exec_ctx_deps ( AVHWDeviceContext * ctx , VulkanExecCtx * cmd )
{
VulkanQueueCtx * q = & cmd - > queues [ cmd - > cur_queue_idx ] ;
for ( int j = 0 ; j < q - > nb_buf_deps ; j + + )
av_buffer_unref ( & q - > buf_deps [ j ] ) ;
q - > nb_buf_deps = 0 ;
}
static int wait_start_exec_ctx ( AVHWDeviceContext * ctx , VulkanExecCtx * cmd )
{
VkResult ret ;
AVVulkanDeviceContext * hwctx = ctx - > hwctx ;
VulkanQueueCtx * q = & cmd - > queues [ cmd - > cur_queue_idx ] ;
VkCommandBufferBeginInfo cmd_start = {
. sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO ,
. flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT ,
} ;
/* Create the fence and don't wait for it initially */
if ( ! q - > fence ) {
VkFenceCreateInfo fence_spawn = {
. sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO ,
} ;
ret = vkCreateFence ( hwctx - > act_dev , & fence_spawn , hwctx - > alloc ,
& q - > fence ) ;
if ( ret ! = VK_SUCCESS ) {
av_log ( ctx , AV_LOG_ERROR , " Failed to queue frame fence: %s \n " ,
vk_ret2str ( ret ) ) ;
return AVERROR_EXTERNAL ;
}
} else if ( ! q - > was_synchronous ) {
vkWaitForFences ( hwctx - > act_dev , 1 , & q - > fence , VK_TRUE , UINT64_MAX ) ;
vkResetFences ( hwctx - > act_dev , 1 , & q - > fence ) ;
}
/* Discard queue dependencies */
unref_exec_ctx_deps ( ctx , cmd ) ;
ret = vkBeginCommandBuffer ( cmd - > bufs [ cmd - > cur_queue_idx ] , & cmd_start ) ;
if ( ret ! = VK_SUCCESS ) {
av_log ( ctx , AV_LOG_ERROR , " Unable to init command buffer: %s \n " ,
vk_ret2str ( ret ) ) ;
return AVERROR_EXTERNAL ;
}
return 0 ;
}
static int add_buf_dep_exec_ctx ( AVHWDeviceContext * ctx , VulkanExecCtx * cmd ,
AVBufferRef * const * deps , int nb_deps )
{
AVBufferRef * * dst ;
VulkanQueueCtx * q = & cmd - > queues [ cmd - > cur_queue_idx ] ;
if ( ! deps | | ! nb_deps )
return 0 ;
dst = av_fast_realloc ( q - > buf_deps , & q - > buf_deps_alloc_size ,
( q - > nb_buf_deps + nb_deps ) * sizeof ( * dst ) ) ;
if ( ! dst )
goto err ;
q - > buf_deps = dst ;
for ( int i = 0 ; i < nb_deps ; i + + ) {
q - > buf_deps [ q - > nb_buf_deps ] = av_buffer_ref ( deps [ i ] ) ;
if ( ! q - > buf_deps [ q - > nb_buf_deps ] )
goto err ;
q - > nb_buf_deps + + ;
}
return 0 ;
err :
unref_exec_ctx_deps ( ctx , cmd ) ;
return AVERROR ( ENOMEM ) ;
}
static int submit_exec_ctx ( AVHWDeviceContext * ctx , VulkanExecCtx * cmd ,
VkSubmitInfo * s_info , int synchronous )
{
VkResult ret ;
VulkanQueueCtx * q = & cmd - > queues [ cmd - > cur_queue_idx ] ;
ret = vkEndCommandBuffer ( cmd - > bufs [ cmd - > cur_queue_idx ] ) ;
if ( ret ! = VK_SUCCESS ) {
av_log ( ctx , AV_LOG_ERROR , " Unable to finish command buffer: %s \n " ,
vk_ret2str ( ret ) ) ;
unref_exec_ctx_deps ( ctx , cmd ) ;
return AVERROR_EXTERNAL ;
}
s_info - > pCommandBuffers = & cmd - > bufs [ cmd - > cur_queue_idx ] ;
s_info - > commandBufferCount = 1 ;
ret = vkQueueSubmit ( q - > queue , 1 , s_info , q - > fence ) ;
if ( ret ! = VK_SUCCESS ) {
unref_exec_ctx_deps ( ctx , cmd ) ;
return AVERROR_EXTERNAL ;
}
q - > was_synchronous = synchronous ;
if ( synchronous ) {
AVVulkanDeviceContext * hwctx = ctx - > hwctx ;
vkWaitForFences ( hwctx - > act_dev , 1 , & q - > fence , VK_TRUE , UINT64_MAX ) ;
vkResetFences ( hwctx - > act_dev , 1 , & q - > fence ) ;
unref_exec_ctx_deps ( ctx , cmd ) ;
} else { /* Rotate queues */
cmd - > cur_queue_idx = ( cmd - > cur_queue_idx + 1 ) % cmd - > nb_queues ;
}
return 0 ;
}
static void vulkan_device_free ( AVHWDeviceContext * ctx )
@ -937,7 +1105,8 @@ if (n >= queue_num) {
p - > qfs [ p - > num_qfs + + ] = hwctx - > queue_family_comp_index ;
/* Create exec context - if there's something invalid this will error out */
err = create_exec_ctx ( ctx , & p - > cmd , hwctx - > queue_family_tx_index ) ;
err = create_exec_ctx ( ctx , & p - > cmd , hwctx - > queue_family_tx_index ,
GET_QUEUE_COUNT ( hwctx , 0 , 0 , 1 ) ) ;
if ( err )
return err ;
@ -1257,26 +1426,17 @@ enum PrepMode {
static int prepare_frame ( AVHWFramesContext * hwfc , VulkanExecCtx * ectx ,
AVVkFrame * frame , enum PrepMode pmode )
{
VkResult ret ;
int err ;
uint32_t dst_qf ;
VkImageLayout new_layout ;
VkAccessFlags new_access ;
AVHWDeviceContext * ctx = hwfc - > device_ctx ;
AVVulkanDeviceContext * hwctx = ctx - > hwctx ;
const int planes = av_pix_fmt_count_planes ( hwfc - > sw_format ) ;
VkImageMemoryBarrier img_bar [ AV_NUM_DATA_POINTERS ] = { 0 } ;
VkCommandBufferBeginInfo cmd_start = {
. sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO ,
. flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT ,
} ;
VkSubmitInfo s_info = {
. sType = VK_STRUCTURE_TYPE_SUBMIT_INFO ,
. commandBufferCount = 1 ,
. pCommandBuffers = & ectx - > buf ,
. pSignalSemaphores = frame - > sem ,
. signalSemaphoreCount = planes ,
} ;
@ -1306,9 +1466,8 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
break ;
}
ret = vkBeginCommandBuffer ( ectx - > buf , & cmd_start ) ;
if ( ret ! = VK_SUCCESS )
return AVERROR_EXTERNAL ;
if ( ( err = wait_start_exec_ctx ( ctx , ectx ) ) )
return err ;
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore , making it usable
@ -1330,23 +1489,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
frame - > access [ i ] = img_bar [ i ] . dstAccessMask ;
}
vkCmdPipelineBarrier ( ectx - > buf , VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT ,
VK_PIPELINE_STAGE_TRANSFER_BIT , 0 ,
0 , NULL , 0 , NULL , planes , img_bar ) ;
ret = vkEndCommandBuffer ( ectx - > buf ) ;
if ( ret ! = VK_SUCCESS )
return AVERROR_EXTERNAL ;
ret = vkQueueSubmit ( ectx - > queue , 1 , & s_info , ectx - > fence ) ;
if ( ret ! = VK_SUCCESS ) {
return AVERROR_EXTERNAL ;
} else {
vkWaitForFences ( hwctx - > act_dev , 1 , & ectx - > fence , VK_TRUE , UINT64_MAX ) ;
vkResetFences ( hwctx - > act_dev , 1 , & ectx - > fence ) ;
}
vkCmdPipelineBarrier ( get_buf_exec_ctx ( ctx , ectx ) ,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT ,
VK_PIPELINE_STAGE_TRANSFER_BIT ,
0 , 0 , NULL , 0 , NULL , planes , img_bar ) ;
return 0 ;
return submit_exec_ctx ( ctx , ectx , & s_info , 0 ) ;
}
static int create_frame ( AVHWFramesContext * hwfc , AVVkFrame * * frame ,
@ -1559,7 +1707,8 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
hwctx - > usage | = DEFAULT_USAGE_FLAGS ;
err = create_exec_ctx ( hwfc - > device_ctx , & fp - > cmd ,
dev_hwctx - > queue_family_tx_index ) ;
dev_hwctx - > queue_family_tx_index ,
GET_QUEUE_COUNT ( dev_hwctx , 0 , 0 , 1 ) ) ;
if ( err )
return err ;
@ -2620,12 +2769,12 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
return err ;
}
static int transfer_image_buf ( AVHWDeviceContext * ctx , AVVk Frame * frame ,
static int transfer_image_buf ( AVHWDeviceContext * ctx , const AVFrame * f ,
AVBufferRef * * bufs , const int * buf_stride , int w ,
int h , enum AVPixelFormat pix_fmt , int to_buf )
{
VkResult ret ;
AVVulkanDeviceContext * hwctx = ctx - > hwctx ;
int err ;
AVVkFrame * frame = ( AVVkFrame * ) f - > data [ 0 ] ;
VulkanDevicePriv * s = ctx - > internal - > priv ;
int bar_num = 0 ;
@ -2634,17 +2783,11 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
const int planes = av_pix_fmt_count_planes ( pix_fmt ) ;
const AVPixFmtDescriptor * desc = av_pix_fmt_desc_get ( pix_fmt ) ;
VkCommandBufferBeginInfo cmd_start = {
. sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO ,
. flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT ,
} ;
VkImageMemoryBarrier img_bar [ AV_NUM_DATA_POINTERS ] = { 0 } ;
VkCommandBuffer cmd_buf = get_buf_exec_ctx ( ctx , & s - > cmd ) ;
VkSubmitInfo s_info = {
. sType = VK_STRUCTURE_TYPE_SUBMIT_INFO ,
. commandBufferCount = 1 ,
. pCommandBuffers = & s - > cmd . buf ,
. pSignalSemaphores = frame - > sem ,
. pWaitSemaphores = frame - > sem ,
. pWaitDstStageMask = sem_wait_dst ,
@ -2652,12 +2795,8 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
. waitSemaphoreCount = planes ,
} ;
ret = vkBeginCommandBuffer ( s - > cmd . buf , & cmd_start ) ;
if ( ret ! = VK_SUCCESS ) {
av_log ( ctx , AV_LOG_ERROR , " Unable to init command buffer: %s \n " ,
vk_ret2str ( ret ) ) ;
return AVERROR_EXTERNAL ;
}
if ( ( err = wait_start_exec_ctx ( ctx , & s - > cmd ) ) )
return err ;
/* Change the image layout to something more optimal for transfers */
for ( int i = 0 ; i < planes ; i + + ) {
@ -2691,7 +2830,7 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
}
if ( bar_num )
vkCmdPipelineBarrier ( s - > cmd . buf , VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT ,
vkCmdPipelineBarrier ( cmd_ buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT ,
VK_PIPELINE_STAGE_TRANSFER_BIT , 0 ,
0 , NULL , 0 , NULL , bar_num , img_bar ) ;
@ -2715,33 +2854,33 @@ static int transfer_image_buf(AVHWDeviceContext *ctx, AVVkFrame *frame,
} ;
if ( to_buf )
vkCmdCopyImageToBuffer ( s - > cmd . buf , frame - > img [ i ] , frame - > layout [ i ] ,
vkCmdCopyImageToBuffer ( cmd_ buf, frame - > img [ i ] , frame - > layout [ i ] ,
vkbuf - > buf , 1 , & buf_reg ) ;
else
vkCmdCopyBufferToImage ( s - > cmd . buf , vkbuf - > buf , frame - > img [ i ] ,
vkCmdCopyBufferToImage ( cmd_ buf, vkbuf - > buf , frame - > img [ i ] ,
frame - > layout [ i ] , 1 , & buf_reg ) ;
}
ret = vkEndCommandBuffer ( s - > cmd . buf ) ;
if ( ret ! = VK_SUCCESS ) {
av_log ( ctx , AV_LOG_ERROR , " Unable to finish command buffer: %s \n " ,
vk_ret2str ( ret ) ) ;
return AVERROR_EXTERNAL ;
}
/* Wait for the download/upload to finish if uploading, otherwise the
* semaphore will take care of synchronization when uploading */
ret = vkQueueSubmit ( s - > cmd . queue , 1 , & s_info , s - > cmd . fence ) ;
if ( ret ! = VK_SUCCESS ) {
av_log ( ctx , AV_LOG_ERROR , " Unable to submit command buffer: %s \n " ,
vk_ret2str ( ret ) ) ;
return AVERROR_EXTERNAL ;
/* When uploading, do this asynchronously if the source is refcounted by
* keeping the buffers as a submission dependency .
* The hwcontext is guaranteed to not be freed until all frames are freed
* in the frames_unint function .
* When downloading to buffer , do this synchronously and wait for the
* queue submission to finish executing */
if ( ! to_buf ) {
int ref ;
for ( ref = 0 ; ref < AV_NUM_DATA_POINTERS ; ref + + ) {
if ( ! f - > buf [ ref ] )
break ;
if ( ( err = add_buf_dep_exec_ctx ( hwfc , & s - > cmd , & f - > buf [ ref ] , 1 ) ) )
return err ;
}
if ( ref & & ( err = add_buf_dep_exec_ctx ( hwfc , & s - > cmd , bufs , planes ) ) )
return err ;
return submit_exec_ctx ( hwfc , & s - > cmd , & s_info , ! ref ) ;
} else {
vkWaitForFences ( hwctx - > act_dev , 1 , & s - > cmd . fence , VK_TRUE , UINT64_MAX ) ;
vkResetFences ( hwctx - > act_dev , 1 , & s - > cmd . fence ) ;
return submit_exec_ctx ( hwfc , & s - > cmd , & s_info , 1 ) ;
}
return 0 ;
}
/* Technically we can use VK_EXT_external_memory_host to upload and download,
@ -2778,11 +2917,11 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
err = vulkan_map_frame_to_mem ( hwfc , map , dst , AV_HWFRAME_MAP_WRITE ) ;
if ( err )
goto end ;
return err ;
err = av_frame_copy ( map , src ) ;
av_frame_free ( & map ) ;
goto end ;
return err ;
}
/* Create buffers */
@ -2809,7 +2948,7 @@ static int vulkan_transfer_data_from_mem(AVHWFramesContext *hwfc, AVFrame *dst,
goto end ;
/* Copy buffers to image */
err = transfer_image_buf ( dev_ctx , f , bufs , tmp . linesize ,
err = transfer_image_buf ( dev_ctx , dst , bufs , tmp . linesize ,
src - > width , src - > height , src - > format , 0 ) ;
end :
@ -2949,10 +3088,12 @@ static int vulkan_transfer_data_to_mem(AVHWFramesContext *hwfc, AVFrame *dst,
err = create_buf ( dev_ctx , & bufs [ i ] , p_height ,
& tmp . linesize [ i ] , VK_BUFFER_USAGE_TRANSFER_DST_BIT ,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT , NULL , NULL ) ;
if ( err )
goto end ;
}
/* Copy image to buffer */
if ( ( err = transfer_image_buf ( dev_ctx , f , bufs , tmp . linesize ,
if ( ( err = transfer_image_buf ( dev_ctx , src , bufs , tmp . linesize ,
dst - > width , dst - > height , dst - > format , 1 ) ) )
goto end ;