diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c index 9d96efa27b..f0a7353d5c 100644 --- a/libavfilter/vf_nlmeans_vulkan.c +++ b/libavfilter/vf_nlmeans_vulkan.c @@ -687,14 +687,16 @@ static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec, VkBufferMemoryBarrier2 buf_bar[8]; int nb_buf_bar = 0; + DenoisePushData pd = { + { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, + }; + /* Denoise pass pipeline */ ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_denoise); /* Push data */ ff_vk_update_push_exec(vkctx, exec, &s->pl_denoise, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(DenoisePushData), &(DenoisePushData) { - { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, - }); + 0, sizeof(pd), &pd); buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, @@ -970,6 +972,10 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) offsets_dispatched, }; + /* Push data */ + ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + if (offsets_dispatched) { nb_buf_bar = 0; buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { @@ -995,10 +1001,6 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) integral_vk->access = buf_bar[1].dstAccessMask; } - /* Push data */ - ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); - wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t); wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]); diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 7e7d9cb70b..6317ab7d0e 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -422,7 +422,7 @@ static const VulkanOptExtension optional_instance_exts[] = { static const VulkanOptExtension optional_device_exts[] = { /* Misc or required by other extensions */ { VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, - { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_NO_FLAG }, + { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR }, { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, FF_VK_EXT_DESCRIPTOR_BUFFER, }, { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME, FF_VK_EXT_DEVICE_DRM }, { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, FF_VK_EXT_ATOMIC_FLOAT }, diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 7ea4c33619..046ac5d67e 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1520,12 +1520,8 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, FFVulkanDescriptorSetBinding *desc, int nb, int singular, int print_to_shader_only) { - VkResult ret; int has_sampler = 0; - FFVulkanFunctions *vk = &s->vkfn; FFVulkanDescriptorSet *set; - VkDescriptorSetLayout *layout; - VkDescriptorSetLayoutCreateInfo desc_create_layout; if (print_to_shader_only) goto print; @@ -1537,14 +1533,7 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, return AVERROR(ENOMEM); pl->desc_set = set; - layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), - pl->nb_descriptor_sets + 1); - if (!layout) - return AVERROR(ENOMEM); - pl->desc_layout = layout; - set = &set[pl->nb_descriptor_sets]; - layout = &layout[pl->nb_descriptor_sets]; memset(set, 0, sizeof(*set)); set->binding = av_calloc(nb, sizeof(*set->binding)); @@ -1557,14 +1546,6 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, return AVERROR(ENOMEM); } - desc_create_layout = (VkDescriptorSetLayoutCreateInfo) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = nb, - .pBindings = set->binding, - .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ? - VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0x0, - }; - for (int i = 0; i < nb; i++) { set->binding[i].binding = i; set->binding[i].descriptorType = desc[i].type; @@ -1582,22 +1563,7 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, if (has_sampler) set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; - ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, - s->hwctx->alloc, layout); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { - vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, *layout, &set->layout_size); - set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment); - - for (int i = 0; i < nb; i++) - vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, *layout, - i, &set->binding_offset[i]); - } else { + if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) { for (int i = 0; i < nb; i++) { int j; VkDescriptorPoolSize *desc_pool_size; @@ -1606,8 +1572,8 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, break; if (j >= pl->nb_desc_pool_size) { desc_pool_size = av_realloc_array(pl->desc_pool_size, - sizeof(*desc_pool_size), - pl->nb_desc_pool_size + 1); + sizeof(*desc_pool_size), + pl->nb_desc_pool_size + 1); if (!desc_pool_size) return AVERROR(ENOMEM); @@ -1703,7 +1669,7 @@ int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, pl->bound_buffer_indices[i] = i; } - } else { + } else if (!pl->use_push) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkDescriptorSetLayout *tmp_layouts; @@ -1796,8 +1762,16 @@ static inline void update_set_pool_write(FFVulkanContext *s, vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL); } } else { - write_info->dstSet = pl->desc_sets[e->idx*pl->nb_descriptor_sets + set]; - vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL); + if (pl->use_push) { + vk->CmdPushDescriptorSetKHR(e->buf, + pl->bind_point, + pl->pipeline_layout, + set, 1, + write_info); + } else { + write_info->dstSet = pl->desc_sets[e->idx*pl->nb_descriptor_sets + set]; + vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL); + } } } @@ -1954,6 +1928,70 @@ void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, stage, offset, size, src); } +static int init_descriptors(FFVulkanContext *s, FFVulkanPipeline *pl) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + pl->desc_layout = av_malloc_array(pl->nb_descriptor_sets, + sizeof(*pl->desc_layout)); + if (!pl->desc_layout) + return AVERROR(ENOMEM); + + if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) { + int has_singular = 0; + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + if (pl->desc_set[i].singular) { + has_singular = 1; + break; + } + } + pl->use_push = (s->extensions & FF_VK_EXT_PUSH_DESCRIPTOR) && + (pl->nb_descriptor_sets == 1) && + !has_singular; + } + + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + VkDescriptorSetLayoutCreateInfo desc_layout_create = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = set->nb_bindings, + .pBindings = set->binding, + .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ? + VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : + (pl->use_push) ? + VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : + 0x0, + }; + + ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, + &desc_layout_create, + s->hwctx->alloc, + &pl->desc_layout[i]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to create descriptor set layout: %s", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, pl->desc_layout[i], + &set->layout_size); + + set->aligned_size = FFALIGN(set->layout_size, + s->desc_buf_props.descriptorBufferOffsetAlignment); + + for (int j = 0; j < set->nb_bindings; j++) + vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, + pl->desc_layout[i], + j, + &set->binding_offset[j]); + } + } + + return 0; +} + static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) { VkResult ret; @@ -1989,6 +2027,10 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, VkComputePipelineCreateInfo pipeline_create_info; + err = init_descriptors(s, pl); + if (err < 0) + return err; + err = init_pipeline_layout(s, pl); if (err < 0) return err; @@ -2038,7 +2080,7 @@ void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout, 0, pl->nb_descriptor_sets, pl->bound_buffer_indices, offsets); - } else { + } else if (!pl->use_push) { vk->CmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout, 0, pl->nb_descriptor_sets, &pl->desc_sets[e->idx*pl->nb_descriptor_sets], diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 7009104a8f..e03fd702ca 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -226,6 +226,7 @@ typedef struct FFVulkanPipeline { int nb_descriptor_sets; /* Descriptor pool */ + int use_push; VkDescriptorSet *desc_sets; VkDescriptorPool desc_pool; VkDescriptorPoolSize *desc_pool_size; diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index da555b37c7..91dd8b91e0 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -48,6 +48,7 @@ typedef enum FFVulkanExtensions { FF_VK_EXT_COOP_MATRIX = 1ULL << 16, /* VK_KHR_cooperative_matrix */ FF_VK_EXT_OPTICAL_FLOW = 1ULL << 17, /* VK_NV_optical_flow */ FF_VK_EXT_SHADER_OBJECT = 1ULL << 18, /* VK_EXT_shader_object */ + FF_VK_EXT_PUSH_DESCRIPTOR = 1ULL << 19, /* VK_KHR_push_descriptor */ FF_VK_EXT_VIDEO_MAINTENANCE_1 = 1ULL << 27, /* VK_KHR_video_maintenance1 */ FF_VK_EXT_VIDEO_ENCODE_QUEUE = 1ULL << 28, /* VK_KHR_video_encode_queue */ @@ -179,6 +180,7 @@ typedef enum FFVulkanExtensions { \ /* Descriptors */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSets) \ + MACRO(1, 1, FF_VK_EXT_PUSH_DESCRIPTOR, CmdPushDescriptorSetKHR) \ \ /* Queries */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateQueryPool) \ diff --git a/libavutil/vulkan_loader.h b/libavutil/vulkan_loader.h index 82ed16c1f0..2022eb320b 100644 --- a/libavutil/vulkan_loader.h +++ b/libavutil/vulkan_loader.h @@ -65,6 +65,7 @@ static inline uint64_t ff_vk_extensions_to_mask(const char * const *extensions, { VK_KHR_VIDEO_ENCODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_ENCODE_H265 }, { VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_H265 }, { VK_KHR_VIDEO_DECODE_AV1_EXTENSION_NAME, FF_VK_EXT_VIDEO_DECODE_AV1 }, + { VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, FF_VK_EXT_PUSH_DESCRIPTOR }, }; FFVulkanExtensions mask = 0x0;