/* * Copyright (c) Lynne * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "avassert.h" #include "mem.h" #include "vulkan.h" #include "libavutil/vulkan_loader.h" const VkComponentMapping ff_comp_identity_map = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, .b = VK_COMPONENT_SWIZZLE_IDENTITY, .a = VK_COMPONENT_SWIZZLE_IDENTITY, }; /* Converts return values to strings */ const char *ff_vk_ret2str(VkResult res) { #define CASE(VAL) case VAL: return #VAL switch (res) { CASE(VK_SUCCESS); CASE(VK_NOT_READY); CASE(VK_TIMEOUT); CASE(VK_EVENT_SET); CASE(VK_EVENT_RESET); CASE(VK_INCOMPLETE); CASE(VK_ERROR_OUT_OF_HOST_MEMORY); CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); CASE(VK_ERROR_INITIALIZATION_FAILED); CASE(VK_ERROR_DEVICE_LOST); CASE(VK_ERROR_MEMORY_MAP_FAILED); CASE(VK_ERROR_LAYER_NOT_PRESENT); CASE(VK_ERROR_EXTENSION_NOT_PRESENT); CASE(VK_ERROR_FEATURE_NOT_PRESENT); CASE(VK_ERROR_INCOMPATIBLE_DRIVER); CASE(VK_ERROR_TOO_MANY_OBJECTS); CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); CASE(VK_ERROR_FRAGMENTED_POOL); CASE(VK_ERROR_UNKNOWN); CASE(VK_ERROR_OUT_OF_POOL_MEMORY); CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); CASE(VK_ERROR_FRAGMENTATION); CASE(VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS); CASE(VK_PIPELINE_COMPILE_REQUIRED); CASE(VK_ERROR_SURFACE_LOST_KHR); CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); CASE(VK_SUBOPTIMAL_KHR); CASE(VK_ERROR_OUT_OF_DATE_KHR); CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); CASE(VK_ERROR_VALIDATION_FAILED_EXT); CASE(VK_ERROR_INVALID_SHADER_NV); CASE(VK_ERROR_VIDEO_PICTURE_LAYOUT_NOT_SUPPORTED_KHR); CASE(VK_ERROR_VIDEO_PROFILE_OPERATION_NOT_SUPPORTED_KHR); CASE(VK_ERROR_VIDEO_PROFILE_FORMAT_NOT_SUPPORTED_KHR); CASE(VK_ERROR_VIDEO_PROFILE_CODEC_NOT_SUPPORTED_KHR); CASE(VK_ERROR_VIDEO_STD_VERSION_NOT_SUPPORTED_KHR); CASE(VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT); CASE(VK_ERROR_NOT_PERMITTED_KHR); CASE(VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT); CASE(VK_THREAD_IDLE_KHR); CASE(VK_THREAD_DONE_KHR); CASE(VK_OPERATION_DEFERRED_KHR); CASE(VK_OPERATION_NOT_DEFERRED_KHR); default: return "Unknown error"; } #undef CASE } static void load_enabled_qfs(FFVulkanContext *s) { s->nb_qfs = 0; for (int i = 0; i < s->hwctx->nb_qf; i++) { /* Skip duplicates */ int skip = 0; for (int j = 0; j < s->nb_qfs; j++) { if (s->qfs[j] == s->hwctx->qf[i].idx) { skip = 1; break; } } if (skip) continue; s->qfs[s->nb_qfs++] = s->hwctx->qf[i].idx; } } int ff_vk_load_props(FFVulkanContext *s) { FFVulkanFunctions *vk = &s->vkfn; s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, }; s->optical_flow_props = (VkPhysicalDeviceOpticalFlowPropertiesNV) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_PROPERTIES_NV, .pNext = &s->hprops, }; s->coop_matrix_props = (VkPhysicalDeviceCooperativeMatrixPropertiesKHR) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR, .pNext = &s->optical_flow_props, }; s->subgroup_props = (VkPhysicalDeviceSubgroupSizeControlProperties) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES, .pNext = &s->coop_matrix_props, }; s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT, .pNext = &s->subgroup_props, }; s->driver_props = (VkPhysicalDeviceDriverProperties) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, .pNext = &s->desc_buf_props, }; s->props = (VkPhysicalDeviceProperties2) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, .pNext = &s->driver_props, }; s->atomic_float_feats = (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT, }; s->feats_12 = (VkPhysicalDeviceVulkan12Features) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, .pNext = &s->atomic_float_feats, }; s->feats = (VkPhysicalDeviceFeatures2) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, .pNext = &s->feats_12, }; vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); vk->GetPhysicalDeviceFeatures2(s->hwctx->phys_dev, &s->feats); load_enabled_qfs(s); if (s->qf_props) return 0; vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, NULL); s->qf_props = av_calloc(s->tot_nb_qfs, sizeof(*s->qf_props)); if (!s->qf_props) return AVERROR(ENOMEM); s->query_props = av_calloc(s->tot_nb_qfs, sizeof(*s->query_props)); if (!s->qf_props) { av_freep(&s->qf_props); return AVERROR(ENOMEM); } s->video_props = av_calloc(s->tot_nb_qfs, sizeof(*s->video_props)); if (!s->video_props) { av_freep(&s->qf_props); av_freep(&s->query_props); return AVERROR(ENOMEM); } for (uint32_t i = 0; i < s->tot_nb_qfs; i++) { s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) { .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR, }; s->video_props[i] = (VkQueueFamilyVideoPropertiesKHR) { .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, .pNext = &s->query_props[i], }; s->qf_props[i] = (VkQueueFamilyProperties2) { .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_PROPERTIES_2, .pNext = &s->video_props[i], }; } vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, s->qf_props); if (s->extensions & FF_VK_EXT_COOP_MATRIX) { vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev, &s->coop_mat_props_nb, NULL); if (s->coop_mat_props_nb) { s->coop_mat_props = av_malloc_array(s->coop_mat_props_nb, sizeof(VkCooperativeMatrixPropertiesKHR)); for (int i = 0; i < s->coop_mat_props_nb; i++) { s->coop_mat_props[i] = (VkCooperativeMatrixPropertiesKHR) { .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR, }; } vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev, &s->coop_mat_props_nb, s->coop_mat_props); } } return 0; } static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) { for (int i = 0; i < s->hwctx->nb_qf; i++) { if (s->hwctx->qf[i].flags & dev_family) { *nb = s->hwctx->qf[i].num; return s->hwctx->qf[i].idx; } } av_assert0(0); /* Should never happen */ } int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, VkQueueFlagBits dev_family) { /* Fill in queue families from context if not done yet */ if (!s->nb_qfs) load_enabled_qfs(s); return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues)); } void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool) { FFVulkanFunctions *vk = &s->vkfn; for (int i = 0; i < pool->pool_size; i++) { FFVkExecContext *e = &pool->contexts[i]; if (e->fence) { vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); } pthread_mutex_destroy(&e->lock); ff_vk_exec_discard_deps(s, e); av_free(e->frame_deps); av_free(e->buf_deps); av_free(e->queue_family_dst); av_free(e->layout_dst); av_free(e->access_dst); av_free(e->frame_update); av_free(e->frame_locked); av_free(e->sem_sig); av_free(e->sem_sig_val_dst); av_free(e->sem_wait); } /* Free shader-specific data */ for (int i = 0; i < pool->nb_reg_shd; i++) { FFVulkanShaderData *sd = &pool->reg_shd[i]; if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { for (int j = 0; j < sd->nb_descriptor_sets; j++) { FFVulkanDescriptorSetData *set_data = &sd->desc_set_buf[j]; if (set_data->buf.mem) ff_vk_unmap_buffer(s, &set_data->buf, 0); ff_vk_free_buf(s, &set_data->buf); } } if (sd->desc_pool) vk->DestroyDescriptorPool(s->hwctx->act_dev, sd->desc_pool, s->hwctx->alloc); av_freep(&sd->desc_set_buf); av_freep(&sd->desc_bind); av_freep(&sd->desc_sets); } if (pool->cmd_bufs) vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool, pool->pool_size, pool->cmd_bufs); if (pool->cmd_buf_pool) vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc); if (pool->query_pool) vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc); av_free(pool->query_data); av_free(pool->cmd_bufs); av_free(pool->contexts); } int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, FFVkExecPool *pool, int nb_contexts, int nb_queries, VkQueryType query_type, int query_64bit, const void *query_create_pnext) { int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkCommandPoolCreateInfo cqueue_create; VkCommandBufferAllocateInfo cbuf_create; const VkQueryPoolVideoEncodeFeedbackCreateInfoKHR *ef = NULL; if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { ef = ff_vk_find_struct(query_create_pnext, VK_STRUCTURE_TYPE_QUERY_POOL_VIDEO_ENCODE_FEEDBACK_CREATE_INFO_KHR); if (!ef) return AVERROR(EINVAL); } /* Create command pool */ cqueue_create = (VkCommandPoolCreateInfo) { .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, .queueFamilyIndex = qf->queue_family, }; ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, s->hwctx->alloc, &pool->cmd_buf_pool); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } /* Allocate space for command buffers */ pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs)); if (!pool->cmd_bufs) { err = AVERROR(ENOMEM); goto fail; } /* Allocate command buffer */ cbuf_create = (VkCommandBufferAllocateInfo) { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .commandPool = pool->cmd_buf_pool, .commandBufferCount = nb_contexts, }; ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, pool->cmd_bufs); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } /* Query pool */ if (nb_queries) { VkQueryPoolCreateInfo query_pool_info = { .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, .pNext = query_create_pnext, .queryType = query_type, .queryCount = nb_queries*nb_contexts, }; ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, s->hwctx->alloc, &pool->query_pool); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n", ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } pool->nb_queries = nb_queries; pool->query_status_stride = 1 + 1; /* One result, one status by default */ pool->query_results = nb_queries; pool->query_statuses = nb_queries; /* Video encode quieries produce two results per query */ if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { int nb_results = av_popcount(ef->encodeFeedbackFlags); pool->query_status_stride = nb_results + 1; pool->query_results *= nb_results; } else if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { pool->query_status_stride = 1; pool->query_results = 0; } pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4); /* Allocate space for the query data */ pool->query_data = av_calloc(nb_contexts, pool->qd_size); if (!pool->query_data) { err = AVERROR(ENOMEM); goto fail; } } /* Allocate space for the contexts */ pool->contexts = av_calloc(nb_contexts, sizeof(*pool->contexts)); if (!pool->contexts) { err = AVERROR(ENOMEM); goto fail; } pool->pool_size = nb_contexts; /* Init contexts */ for (int i = 0; i < pool->pool_size; i++) { FFVkExecContext *e = &pool->contexts[i]; VkFenceCreateInfo fence_create = { .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, .flags = VK_FENCE_CREATE_SIGNALED_BIT, }; /* Mutex */ err = pthread_mutex_init(&e->lock, NULL); if (err != 0) return AVERROR(err); /* Fence */ ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc, &e->fence); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } e->idx = i; e->parent = pool; /* Query data */ e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i; e->query_idx = nb_queries*i; /* Command buffer */ e->buf = pool->cmd_bufs[i]; /* Queue index distribution */ e->qi = i % qf->nb_queues; e->qf = qf->queue_family; vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, e->qi, &e->queue); } return 0; fail: ff_vk_exec_pool_free(s, pool); return err; } VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, void **data, VkQueryResultFlagBits flags) { FFVulkanFunctions *vk = &s->vkfn; const FFVkExecPool *pool = e->parent; VkQueryResultFlags qf = flags & ~(VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WITH_STATUS_BIT_KHR); if (!e->query_data) { av_log(s, AV_LOG_ERROR, "Requested a query with a NULL query_data pointer!\n"); return VK_INCOMPLETE; } qf |= pool->query_64bit ? VK_QUERY_RESULT_64_BIT : 0x0; qf |= pool->query_statuses ? VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; if (data) *data = e->query_data; return vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool, e->query_idx, pool->nb_queries, pool->qd_size, e->query_data, pool->qd_size, qf); } FFVkExecContext *ff_vk_exec_get(FFVulkanContext *s, FFVkExecPool *pool) { FFVulkanFunctions *vk = &s->vkfn; FFVkExecContext *e = &pool->contexts[pool->idx]; /* Check if last submission has already finished. * If so, don't waste resources and reuse the same buffer. */ if (vk->GetFenceStatus(s->hwctx->act_dev, e->fence) == VK_SUCCESS) return e; pool->idx = (pool->idx + 1) % pool->pool_size; return &pool->contexts[pool->idx]; } void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e) { FFVulkanFunctions *vk = &s->vkfn; pthread_mutex_lock(&e->lock); vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); ff_vk_exec_discard_deps(s, e); pthread_mutex_unlock(&e->lock); } int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; const FFVkExecPool *pool = e->parent; VkCommandBufferBeginInfo cmd_start = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, }; /* Wait for the fence to be signalled */ vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); /* vkResetFences is defined as being host-synchronized */ pthread_mutex_lock(&e->lock); vk->ResetFences(s->hwctx->act_dev, 1, &e->fence); pthread_mutex_unlock(&e->lock); /* Discard queue dependencies */ ff_vk_exec_discard_deps(s, e); ret = vk->BeginCommandBuffer(e->buf, &cmd_start); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } if (pool->nb_queries) vk->CmdResetQueryPool(e->buf, pool->query_pool, e->query_idx, pool->nb_queries); return 0; } void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e) { for (int j = 0; j < e->nb_buf_deps; j++) av_buffer_unref(&e->buf_deps[j]); e->nb_buf_deps = 0; for (int j = 0; j < e->nb_frame_deps; j++) { AVFrame *f = e->frame_deps[j]; if (e->frame_locked[j]) { AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; AVVulkanFramesContext *vkfc = hwfc->hwctx; AVVkFrame *vkf = (AVVkFrame *)f->data[0]; vkfc->unlock_frame(hwfc, vkf); e->frame_locked[j] = 0; } e->frame_update[j] = 0; if (f->buf[0]) av_frame_free(&e->frame_deps[j]); } e->nb_frame_deps = 0; e->sem_wait_cnt = 0; e->sem_sig_cnt = 0; e->sem_sig_val_dst_cnt = 0; } int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, AVBufferRef **deps, int nb_deps, int ref) { AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size, (e->nb_buf_deps + nb_deps) * sizeof(*dst)); if (!dst) { ff_vk_exec_discard_deps(s, e); return AVERROR(ENOMEM); } e->buf_deps = dst; for (int i = 0; i < nb_deps; i++) { e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i]; if (!e->buf_deps[e->nb_buf_deps]) { ff_vk_exec_discard_deps(s, e); return AVERROR(ENOMEM); } e->nb_buf_deps++; } return 0; } #define ARR_REALLOC(str, arr, alloc_s, cnt) \ do { \ arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \ if (!arr) { \ ff_vk_exec_discard_deps(s, e); \ return AVERROR(ENOMEM); \ } \ str->arr = arr; \ } while (0) typedef struct TempSyncCtx { int nb_sem; VkSemaphore sem[]; } TempSyncCtx; static void destroy_tmp_semaphores(void *opaque, uint8_t *data) { FFVulkanContext *s = opaque; FFVulkanFunctions *vk = &s->vkfn; TempSyncCtx *ts = (TempSyncCtx *)data; for (int i = 0; i < ts->nb_sem; i++) vk->DestroySemaphore(s->hwctx->act_dev, ts->sem[i], s->hwctx->alloc); av_free(ts); } int ff_vk_exec_add_dep_bool_sem(FFVulkanContext *s, FFVkExecContext *e, VkSemaphore *sem, int nb, VkPipelineStageFlagBits2 stage, int wait) { int err; size_t buf_size; AVBufferRef *buf; TempSyncCtx *ts; FFVulkanFunctions *vk = &s->vkfn; /* Do not transfer ownership if we're signalling a binary semaphore, * since we're probably exporting it. */ if (!wait) { for (int i = 0; i < nb; i++) { VkSemaphoreSubmitInfo *sem_sig; ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, .semaphore = sem[i], .stageMask = stage, }; } return 0; } buf_size = sizeof(*ts) + sizeof(VkSemaphore)*nb; ts = av_mallocz(buf_size); if (!ts) { err = AVERROR(ENOMEM); goto fail; } memcpy(ts->sem, sem, nb*sizeof(*sem)); ts->nb_sem = nb; buf = av_buffer_create((uint8_t *)ts, buf_size, destroy_tmp_semaphores, s, 0); if (!buf) { av_free(ts); err = AVERROR(ENOMEM); goto fail; } err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); if (err < 0) { av_buffer_unref(&buf); return err; } for (int i = 0; i < nb; i++) { VkSemaphoreSubmitInfo *sem_wait; ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt); e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, .semaphore = sem[i], .stageMask = stage, }; } return 0; fail: for (int i = 0; i < nb; i++) vk->DestroySemaphore(s->hwctx->act_dev, sem[i], s->hwctx->alloc); return err; } int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkPipelineStageFlagBits2 wait_stage, VkPipelineStageFlagBits2 signal_stage) { uint8_t *frame_locked; uint8_t *frame_update; AVFrame **frame_deps; VkImageLayout *layout_dst; uint32_t *queue_family_dst; VkAccessFlagBits *access_dst; AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; AVVulkanFramesContext *vkfc = hwfc->hwctx; AVVkFrame *vkf = (AVVkFrame *)f->data[0]; int nb_images = ff_vk_count_images(vkf); /* Don't add duplicates */ for (int i = 0; i < e->nb_frame_deps; i++) if (e->frame_deps[i]->data[0] == f->data[0]) return 1; ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps); ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps); ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps); ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps); ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps); ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps); e->frame_deps[e->nb_frame_deps] = f->buf[0] ? av_frame_clone(f) : f; if (!e->frame_deps[e->nb_frame_deps]) { ff_vk_exec_discard_deps(s, e); return AVERROR(ENOMEM); } vkfc->lock_frame(hwfc, vkf); e->frame_locked[e->nb_frame_deps] = 1; e->frame_update[e->nb_frame_deps] = 0; e->nb_frame_deps++; for (int i = 0; i < nb_images; i++) { VkSemaphoreSubmitInfo *sem_wait; VkSemaphoreSubmitInfo *sem_sig; uint64_t **sem_sig_val_dst; ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt); ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, .semaphore = vkf->sem[i], .value = vkf->sem_value[i], .stageMask = wait_stage, }; e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, .semaphore = vkf->sem[i], .value = vkf->sem_value[i] + 1, .stageMask = signal_stage, }; e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i]; e->sem_sig_val_dst_cnt++; } return 0; } void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar) { int i; for (i = 0; i < e->nb_frame_deps; i++) if (e->frame_deps[i]->data[0] == f->data[0]) break; av_assert0(i < e->nb_frame_deps); /* Don't update duplicates */ if (nb_img_bar && !e->frame_update[i]) (*nb_img_bar)++; e->queue_family_dst[i] = bar->dstQueueFamilyIndex; e->access_dst[i] = bar->dstAccessMask; e->layout_dst[i] = bar->newLayout; e->frame_update[i] = 1; } int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, VkSemaphore *dst, uint64_t *dst_val, AVFrame *f) { uint64_t **sem_sig_val_dst; AVVkFrame *vkf = (AVVkFrame *)f->data[0]; /* Reject unknown frames */ int i; for (i = 0; i < e->nb_frame_deps; i++) if (e->frame_deps[i]->data[0] == f->data[0]) break; if (i == e->nb_frame_deps) return AVERROR(EINVAL); ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); *dst = vkf->sem[0]; *dst_val = vkf->sem_value[0]; e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val; e->sem_sig_val_dst_cnt++; return 0; } int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, .commandBuffer = e->buf, }; VkSubmitInfo2 submit_info = (VkSubmitInfo2) { .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, .pCommandBufferInfos = &cmd_buf_info, .commandBufferInfoCount = 1, .pWaitSemaphoreInfos = e->sem_wait, .waitSemaphoreInfoCount = e->sem_wait_cnt, .pSignalSemaphoreInfos = e->sem_sig, .signalSemaphoreInfoCount = e->sem_sig_cnt, }; ret = vk->EndCommandBuffer(e->buf); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", ff_vk_ret2str(ret)); ff_vk_exec_discard_deps(s, e); return AVERROR_EXTERNAL; } s->hwctx->lock_queue(s->device, e->qf, e->qi); ret = vk->QueueSubmit2(e->queue, 1, &submit_info, e->fence); s->hwctx->unlock_queue(s->device, e->qf, e->qi); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", ff_vk_ret2str(ret)); ff_vk_exec_discard_deps(s, e); return AVERROR_EXTERNAL; } for (int i = 0; i < e->sem_sig_val_dst_cnt; i++) *e->sem_sig_val_dst[i] += 1; /* Unlock all frames */ for (int j = 0; j < e->nb_frame_deps; j++) { if (e->frame_locked[j]) { AVFrame *f = e->frame_deps[j]; AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; AVVulkanFramesContext *vkfc = hwfc->hwctx; AVVkFrame *vkf = (AVVkFrame *)f->data[0]; if (e->frame_update[j]) { int nb_images = ff_vk_count_images(vkf); for (int i = 0; i < nb_images; i++) { vkf->layout[i] = e->layout_dst[j]; vkf->access[i] = e->access_dst[j]; vkf->queue_family[i] = e->queue_family_dst[j]; } } vkfc->unlock_frame(hwfc, vkf); e->frame_locked[j] = 0; } } e->had_submission = 1; return 0; } int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, VkMemoryPropertyFlagBits req_flags, void *alloc_extension, VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) { VkResult ret; int index = -1; FFVulkanFunctions *vk = &s->vkfn; VkMemoryAllocateInfo alloc_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .pNext = alloc_extension, }; /* Align if we need to */ if ((req_flags != UINT32_MAX) && req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) req->size = FFALIGN(req->size, s->props.properties.limits.minMemoryMapAlignment); alloc_info.allocationSize = req->size; /* The vulkan spec requires memory types to be sorted in the "optimal" * order, so the first matching type we find will be the best/fastest one */ for (int i = 0; i < s->mprops.memoryTypeCount; i++) { /* The memory type must be supported by the requirements (bitfield) */ if (!(req->memoryTypeBits & (1 << i))) continue; /* The memory type flags must include our properties */ if ((req_flags != UINT32_MAX) && ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)) continue; /* Found a suitable memory type */ index = i; break; } if (index < 0) { av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", req_flags); return AVERROR(EINVAL); } alloc_info.memoryTypeIndex = index; ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info, s->hwctx->alloc, mem); if (ret != VK_SUCCESS) return AVERROR(ENOMEM); if (mem_flags) *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; return 0; } int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) { int err; VkResult ret; int use_ded_mem; FFVulkanFunctions *vk = &s->vkfn; VkBufferCreateInfo buf_spawn = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = pNext, .usage = usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .size = size, /* Gets FFALIGNED during alloc if host visible but should be ok */ }; VkMemoryAllocateFlagsInfo alloc_flags = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, }; VkBufferMemoryRequirementsInfo2 req_desc = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, }; VkMemoryDedicatedAllocateInfo ded_alloc = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, .pNext = alloc_pNext, }; VkMemoryDedicatedRequirements ded_req = { .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, }; VkMemoryRequirements2 req = { .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, .pNext = &ded_req, }; ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, s->hwctx->alloc, &buf->buf); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } req_desc.buffer = buf->buf; vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req); /* In case the implementation prefers/requires dedicated allocation */ use_ded_mem = ded_req.prefersDedicatedAllocation | ded_req.requiresDedicatedAllocation; if (use_ded_mem) { ded_alloc.buffer = buf->buf; ded_alloc.pNext = alloc_pNext; alloc_pNext = &ded_alloc; } if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { alloc_flags.pNext = alloc_pNext; alloc_pNext = &alloc_flags; } err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext, &buf->flags, &buf->mem); if (err) return err; ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { VkBufferDeviceAddressInfo address_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, .buffer = buf->buf, }; buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info); } buf->size = size; return 0; } static void destroy_avvkbuf(void *opaque, uint8_t *data) { FFVulkanContext *s = opaque; FFVkBuffer *buf = (FFVkBuffer *)data; ff_vk_free_buf(s, buf); av_free(buf); } int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size, void *pNext, void *alloc_pNext, VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) { int err; AVBufferRef *buf; FFVkBuffer *vkb = av_mallocz(sizeof(*vkb)); if (!vkb) return AVERROR(ENOMEM); err = ff_vk_create_buf(s, vkb, size, pNext, alloc_pNext, usage, flags); if (err < 0) { av_free(vkb); return err; } buf = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), destroy_avvkbuf, s, 0); if (!buf) { destroy_avvkbuf(s, (uint8_t *)vkb); return AVERROR(ENOMEM); } *ref = buf; return 0; } int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], int nb_buffers, int invalidate) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkMappedMemoryRange inval_list[64]; int inval_count = 0; for (int i = 0; i < nb_buffers; i++) { void *dst; ret = vk->MapMemory(s->hwctx->act_dev, buf[i]->mem, 0, VK_WHOLE_SIZE, 0, &dst); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } mem[i] = dst; } if (!invalidate) return 0; for (int i = 0; i < nb_buffers; i++) { const VkMappedMemoryRange ival_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = buf[i]->mem, .size = VK_WHOLE_SIZE, }; if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; inval_list[inval_count++] = ival_buf; } if (inval_count) { ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count, inval_list); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } } return 0; } int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, int flush) { int err = 0; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkMappedMemoryRange flush_list[64]; int flush_count = 0; if (flush) { for (int i = 0; i < nb_buffers; i++) { const VkMappedMemoryRange flush_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, .memory = buf[i]->mem, .size = VK_WHOLE_SIZE, }; if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; flush_list[flush_count++] = flush_buf; } } if (flush_count) { ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count, flush_list); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n", ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ } } for (int i = 0; i < nb_buffers; i++) vk->UnmapMemory(s->hwctx->act_dev, buf[i]->mem); return err; } void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) { FFVulkanFunctions *vk = &s->vkfn; if (!buf || !s->hwctx) return; if (buf->mapped_mem) ff_vk_unmap_buffer(s, buf, 0); if (buf->buf != VK_NULL_HANDLE) vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); if (buf->mem != VK_NULL_HANDLE) vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); } static void free_data_buf(void *opaque, uint8_t *data) { FFVulkanContext *ctx = opaque; FFVkBuffer *buf = (FFVkBuffer *)data; ff_vk_free_buf(ctx, buf); av_free(data); } static AVBufferRef *alloc_data_buf(void *opaque, size_t size) { AVBufferRef *ref; uint8_t *buf = av_mallocz(size); if (!buf) return NULL; ref = av_buffer_create(buf, size, free_data_buf, opaque, 0); if (!ref) av_free(buf); return ref; } int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, AVBufferRef **buf, VkBufferUsageFlags usage, void *create_pNext, size_t size, VkMemoryPropertyFlagBits mem_props) { int err; AVBufferRef *ref; FFVkBuffer *data; if (!(*buf_pool)) { *buf_pool = av_buffer_pool_init2(sizeof(FFVkBuffer), ctx, alloc_data_buf, NULL); if (!(*buf_pool)) return AVERROR(ENOMEM); } *buf = ref = av_buffer_pool_get(*buf_pool); if (!ref) return AVERROR(ENOMEM); data = (FFVkBuffer *)ref->data; data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; data->access = VK_ACCESS_2_NONE; if (data->size >= size) return 0; ff_vk_free_buf(ctx, data); memset(data, 0, sizeof(*data)); av_log(ctx, AV_LOG_DEBUG, "Allocating buffer of %"SIZE_SPECIFIER" bytes for pool %p\n", size, *buf_pool); err = ff_vk_create_buf(ctx, data, size, create_pNext, NULL, usage, mem_props); if (err < 0) { av_buffer_unref(&ref); return err; } if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { err = ff_vk_map_buffer(ctx, data, &data->mapped_mem, 0); if (err < 0) { av_buffer_unref(&ref); return err; } } return 0; } int ff_vk_shader_add_push_const(FFVulkanShader *shd, int offset, int size, VkShaderStageFlagBits stage) { VkPushConstantRange *pc; shd->push_consts = av_realloc_array(shd->push_consts, sizeof(*shd->push_consts), shd->push_consts_num + 1); if (!shd->push_consts) return AVERROR(ENOMEM); pc = &shd->push_consts[shd->push_consts_num++]; memset(pc, 0, sizeof(*pc)); pc->stageFlags = stage; pc->offset = offset; pc->size = size; return 0; } int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, int unnorm_coords, VkFilter filt) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkSamplerCreateInfo sampler_info = { .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .magFilter = filt, .minFilter = sampler_info.magFilter, .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST : VK_SAMPLER_MIPMAP_MODE_LINEAR, .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, .addressModeV = sampler_info.addressModeU, .addressModeW = sampler_info.addressModeU, .anisotropyEnable = VK_FALSE, .compareOp = VK_COMPARE_OP_NEVER, .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, .unnormalizedCoordinates = unnorm_coords, }; ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, s->hwctx->alloc, sampler); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } return 0; } int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) { if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA || pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 || pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 || pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 || pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 || pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0 || pix_fmt == AV_PIX_FMT_X2RGB10 || pix_fmt == AV_PIX_FMT_X2BGR10 || pix_fmt == AV_PIX_FMT_RGBAF32) return 1; return 0; } const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pix_fmt, enum FFVkShaderRepFormat rep_fmt) { switch (pix_fmt) { case AV_PIX_FMT_RGBA: case AV_PIX_FMT_BGRA: case AV_PIX_FMT_RGB24: case AV_PIX_FMT_BGR24: case AV_PIX_FMT_BGR0: case AV_PIX_FMT_RGB0: case AV_PIX_FMT_RGB565: case AV_PIX_FMT_BGR565: case AV_PIX_FMT_YUYV422: case AV_PIX_FMT_UYVY422: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "rgba8ui", [FF_VK_REP_FLOAT] = "rgba8", [FF_VK_REP_INT] = "rgba8i", [FF_VK_REP_UINT] = "rgba8ui", }; return rep_tab[rep_fmt]; } case AV_PIX_FMT_X2RGB10: case AV_PIX_FMT_X2BGR10: case AV_PIX_FMT_Y210: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "rgb10_a2ui", [FF_VK_REP_FLOAT] = "rgb10_a2", [FF_VK_REP_INT] = NULL, [FF_VK_REP_UINT] = "rgb10_a2ui", }; return rep_tab[rep_fmt]; } case AV_PIX_FMT_RGB48: case AV_PIX_FMT_RGBA64: case AV_PIX_FMT_Y212: case AV_PIX_FMT_XV36: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "rgba16ui", [FF_VK_REP_FLOAT] = "rgba16", [FF_VK_REP_INT] = "rgba16i", [FF_VK_REP_UINT] = "rgba16ui", }; return rep_tab[rep_fmt]; } case AV_PIX_FMT_RGBAF32: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "rgba32", [FF_VK_REP_FLOAT] = "rgba32", [FF_VK_REP_INT] = "rgba32i", [FF_VK_REP_UINT] = "rgba32ui", }; return rep_tab[rep_fmt]; } case AV_PIX_FMT_GRAY8: case AV_PIX_FMT_GBRAP: case AV_PIX_FMT_YUV420P: case AV_PIX_FMT_YUV422P: case AV_PIX_FMT_YUV444P: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "r8ui", [FF_VK_REP_FLOAT] = "r8", [FF_VK_REP_INT] = "r8i", [FF_VK_REP_UINT] = "r8ui", }; return rep_tab[rep_fmt]; }; case AV_PIX_FMT_GRAY16: case AV_PIX_FMT_GBRAP16: case AV_PIX_FMT_YUV420P10: case AV_PIX_FMT_YUV420P12: case AV_PIX_FMT_YUV420P16: case AV_PIX_FMT_YUV422P10: case AV_PIX_FMT_YUV422P12: case AV_PIX_FMT_YUV422P16: case AV_PIX_FMT_YUV444P10: case AV_PIX_FMT_YUV444P12: case AV_PIX_FMT_YUV444P16: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "r16ui", [FF_VK_REP_FLOAT] = "r16f", [FF_VK_REP_INT] = "r16i", [FF_VK_REP_UINT] = "r16ui", }; return rep_tab[rep_fmt]; }; case AV_PIX_FMT_GRAYF32: case AV_PIX_FMT_GBRPF32: case AV_PIX_FMT_GBRAPF32: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "r32f", [FF_VK_REP_FLOAT] = "r32f", [FF_VK_REP_INT] = "r32i", [FF_VK_REP_UINT] = "r32ui", }; return rep_tab[rep_fmt]; }; case AV_PIX_FMT_NV12: case AV_PIX_FMT_NV16: case AV_PIX_FMT_NV24: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "rg8ui", [FF_VK_REP_FLOAT] = "rg8", [FF_VK_REP_INT] = "rg8i", [FF_VK_REP_UINT] = "rg8ui", }; return rep_tab[rep_fmt]; }; case AV_PIX_FMT_P010: case AV_PIX_FMT_P210: case AV_PIX_FMT_P410: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "rgb10_a2ui", [FF_VK_REP_FLOAT] = "rgb10_a2", [FF_VK_REP_INT] = NULL, [FF_VK_REP_UINT] = "rgb10_a2ui", }; return rep_tab[rep_fmt]; }; case AV_PIX_FMT_P012: case AV_PIX_FMT_P016: case AV_PIX_FMT_P212: case AV_PIX_FMT_P216: case AV_PIX_FMT_P412: case AV_PIX_FMT_P416: { const char *rep_tab[] = { [FF_VK_REP_NATIVE] = "rg16ui", [FF_VK_REP_FLOAT] = "rg16", [FF_VK_REP_INT] = "rg16i", [FF_VK_REP_UINT] = "rg16ui", }; return rep_tab[rep_fmt]; }; default: return "rgba32f"; } } typedef struct ImageViewCtx { int nb_views; VkImageView views[]; } ImageViewCtx; static void destroy_imageviews(void *opaque, uint8_t *data) { FFVulkanContext *s = opaque; FFVulkanFunctions *vk = &s->vkfn; ImageViewCtx *iv = (ImageViewCtx *)data; for (int i = 0; i < iv->nb_views; i++) vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); av_free(iv); } int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, VkImageView views[AV_NUM_DATA_POINTERS], AVFrame *f) { int err; VkResult ret; AVBufferRef *buf; FFVulkanFunctions *vk = &s->vkfn; AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); AVVkFrame *vkf = (AVVkFrame *)f->data[0]; const int nb_images = ff_vk_count_images(vkf); const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); ImageViewCtx *iv; const size_t buf_size = sizeof(*iv) + nb_planes*sizeof(VkImageView); iv = av_mallocz(buf_size); if (!iv) return AVERROR(ENOMEM); for (int i = 0; i < nb_planes; i++) { VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT, VK_IMAGE_ASPECT_PLANE_2_BIT, }; VkImageViewCreateInfo view_create_info = { .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .pNext = NULL, .image = vkf->img[FFMIN(i, nb_images - 1)], .viewType = VK_IMAGE_VIEW_TYPE_2D, .format = rep_fmts[i], .components = ff_comp_identity_map, .subresourceRange = { .aspectMask = plane_aspect[(nb_planes != nb_images) + i*(nb_planes != nb_images)], .levelCount = 1, .layerCount = 1, }, }; ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, s->hwctx->alloc, &iv->views[i]); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", ff_vk_ret2str(ret)); err = AVERROR_EXTERNAL; goto fail; } iv->nb_views++; } buf = av_buffer_create((uint8_t *)iv, buf_size, destroy_imageviews, s, 0); if (!buf) { err = AVERROR(ENOMEM); goto fail; } /* Add to queue dependencies */ err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); if (err < 0) av_buffer_unref(&buf); memcpy(views, iv->views, nb_planes*sizeof(*views)); return err; fail: for (int i = 0; i < iv->nb_views; i++) vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); av_free(iv); return err; } void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, VkPipelineStageFlags src_stage, VkPipelineStageFlags dst_stage, VkAccessFlagBits new_access, VkImageLayout new_layout, uint32_t new_qf) { int found = -1; AVVkFrame *vkf = (AVVkFrame *)pic->data[0]; const int nb_images = ff_vk_count_images(vkf); for (int i = 0; i < e->nb_frame_deps; i++) if (e->frame_deps[i]->data[0] == pic->data[0]) { if (e->frame_update[i]) found = i; break; } for (int i = 0; i < nb_images; i++) { bar[*nb_bar] = (VkImageMemoryBarrier2) { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, .pNext = NULL, .srcStageMask = src_stage, .dstStageMask = dst_stage, .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i], .dstAccessMask = new_access, .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0], .newLayout = new_layout, .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0], .dstQueueFamilyIndex = new_qf, .image = vkf->img[i], .subresourceRange = (VkImageSubresourceRange) { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .layerCount = 1, .levelCount = 1, }, }; *nb_bar += 1; } ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL); } int ff_vk_shader_init(FFVulkanContext *s, FFVulkanShader *shd, const char *name, VkPipelineStageFlags stage, const char *extensions[], int nb_extensions, int lg_x, int lg_y, int lg_z, uint32_t required_subgroup_size) { av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); shd->name = name; shd->stage = stage; shd->lg_size[0] = lg_x; shd->lg_size[1] = lg_y; shd->lg_size[2] = lg_z; switch (shd->stage) { case VK_SHADER_STAGE_ANY_HIT_BIT_KHR: case VK_SHADER_STAGE_CALLABLE_BIT_KHR: case VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR: case VK_SHADER_STAGE_INTERSECTION_BIT_KHR: case VK_SHADER_STAGE_MISS_BIT_KHR: case VK_SHADER_STAGE_RAYGEN_BIT_KHR: shd->bind_point = VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR; break; case VK_SHADER_STAGE_COMPUTE_BIT: shd->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; break; default: shd->bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS; break; }; if (required_subgroup_size) { shd->subgroup_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO; shd->subgroup_info.requiredSubgroupSize = required_subgroup_size; } av_bprintf(&shd->src, "/* %s shader: %s */\n", (stage == VK_SHADER_STAGE_TASK_BIT_EXT || stage == VK_SHADER_STAGE_MESH_BIT_EXT) ? "Mesh" : (shd->bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR) ? "Raytrace" : (shd->bind_point == VK_PIPELINE_BIND_POINT_COMPUTE) ? "Compute" : "Graphics", name); GLSLF(0, #version %i ,460); GLSLC(0, ); /* Common utilities */ GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); GLSLC(0, ); GLSLC(0, #extension GL_EXT_scalar_block_layout : require ); GLSLC(0, #extension GL_EXT_shader_explicit_arithmetic_types : require ); GLSLC(0, #extension GL_EXT_control_flow_attributes : require ); if ((s->extensions & FF_VK_EXT_DEBUG_UTILS) && (s->extensions & FF_VK_EXT_RELAXED_EXTENDED_INSTR)) { GLSLC(0, #extension GL_EXT_debug_printf : require ); GLSLC(0, #define DEBUG ); } if (stage == VK_SHADER_STAGE_TASK_BIT_EXT || stage == VK_SHADER_STAGE_MESH_BIT_EXT) GLSLC(0, #extension GL_EXT_mesh_shader : require ); for (int i = 0; i < nb_extensions; i++) GLSLF(0, #extension %s : %s ,extensions[i], "require"); GLSLC(0, ); GLSLF(0, layout (local_size_x = %i, local_size_y = %i, local_size_z = %i) in; , shd->lg_size[0], shd->lg_size[1], shd->lg_size[2]); GLSLC(0, ); return 0; } void ff_vk_shader_print(void *ctx, FFVulkanShader *shd, int prio) { int line = 0; const char *p = shd->src.str; const char *start = p; const size_t len = strlen(p); AVBPrint buf; av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); for (int i = 0; i < len; i++) { if (p[i] == '\n') { av_bprintf(&buf, "%i\t", ++line); av_bprint_append_data(&buf, start, &p[i] - start + 1); start = &p[i + 1]; } } av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str); av_bprint_finalize(&buf, NULL); } static int init_pipeline_layout(FFVulkanContext *s, FFVulkanShader *shd) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkPipelineLayoutCreateInfo pipeline_layout_info; /* Finally create the pipeline layout */ pipeline_layout_info = (VkPipelineLayoutCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .pSetLayouts = shd->desc_layout, .setLayoutCount = shd->nb_descriptor_sets, .pushConstantRangeCount = shd->push_consts_num, .pPushConstantRanges = shd->push_consts, }; ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info, s->hwctx->alloc, &shd->pipeline_layout); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } return 0; } static int create_shader_module(FFVulkanContext *s, FFVulkanShader *shd, VkShaderModule *mod, uint8_t *spirv, size_t spirv_len) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkShaderModuleCreateInfo shader_module_info = { .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, .pNext = NULL, .flags = 0x0, .pCode = (void *)spirv, .codeSize = spirv_len, }; ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_module_info, s->hwctx->alloc, mod); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } return 0; } static int init_compute_pipeline(FFVulkanContext *s, FFVulkanShader *shd, VkShaderModule mod, const char *entrypoint) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkComputePipelineCreateInfo pipeline_create_info = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ? VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0x0, .layout = shd->pipeline_layout, .stage = (VkPipelineShaderStageCreateInfo) { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, .pNext = shd->subgroup_info.requiredSubgroupSize ? &shd->subgroup_info : NULL, .pName = entrypoint, .flags = shd->subgroup_info.requiredSubgroupSize ? VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT : 0x0, .stage = shd->stage, .module = mod, }, }; ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipeline_create_info, s->hwctx->alloc, &shd->pipeline); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } return 0; } static int create_shader_object(FFVulkanContext *s, FFVulkanShader *shd, uint8_t *spirv, size_t spirv_len, const char *entrypoint) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; size_t shader_size = 0; VkShaderCreateInfoEXT shader_obj_create = { .sType = VK_STRUCTURE_TYPE_SHADER_CREATE_INFO_EXT, .flags = shd->subgroup_info.requiredSubgroupSize ? VK_SHADER_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT : 0x0, .stage = shd->stage, .nextStage = 0, .codeType = VK_SHADER_CODE_TYPE_SPIRV_EXT, .pCode = spirv, .codeSize = spirv_len, .pName = entrypoint, .pSetLayouts = shd->desc_layout, .setLayoutCount = shd->nb_descriptor_sets, .pushConstantRangeCount = shd->push_consts_num, .pPushConstantRanges = shd->push_consts, .pSpecializationInfo = NULL, }; ret = vk->CreateShadersEXT(s->hwctx->act_dev, 1, &shader_obj_create, s->hwctx->alloc, &shd->object); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to create shader object: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } if (vk->GetShaderBinaryDataEXT(s->hwctx->act_dev, shd->object, &shader_size, NULL) == VK_SUCCESS) av_log(s, AV_LOG_VERBOSE, "Shader %s size: %zu binary (%zu SPIR-V)\n", shd->name, shader_size, spirv_len); return 0; } static int init_descriptors(FFVulkanContext *s, FFVulkanShader *shd) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; shd->desc_layout = av_malloc_array(shd->nb_descriptor_sets, sizeof(*shd->desc_layout)); if (!shd->desc_layout) return AVERROR(ENOMEM); if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) { int has_singular = 0; for (int i = 0; i < shd->nb_descriptor_sets; i++) { if (shd->desc_set[i].singular) { has_singular = 1; break; } } shd->use_push = (s->extensions & FF_VK_EXT_PUSH_DESCRIPTOR) && (shd->nb_descriptor_sets == 1) && !has_singular; } for (int i = 0; i < shd->nb_descriptor_sets; i++) { FFVulkanDescriptorSet *set = &shd->desc_set[i]; VkDescriptorSetLayoutCreateInfo desc_layout_create = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .bindingCount = set->nb_bindings, .pBindings = set->binding, .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : (shd->use_push) ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0x0, }; ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_layout_create, s->hwctx->alloc, &shd->desc_layout[i]); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to create descriptor set layout: %s", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, shd->desc_layout[i], &set->layout_size); set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment); for (int j = 0; j < set->nb_bindings; j++) vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, shd->desc_layout[i], j, &set->binding_offset[j]); } } return 0; } int ff_vk_shader_link(FFVulkanContext *s, FFVulkanShader *shd, uint8_t *spirv, size_t spirv_len, const char *entrypoint) { int err; FFVulkanFunctions *vk = &s->vkfn; err = init_descriptors(s, shd); if (err < 0) return err; err = init_pipeline_layout(s, shd); if (err < 0) return err; if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { shd->bound_buffer_indices = av_calloc(shd->nb_descriptor_sets, sizeof(*shd->bound_buffer_indices)); if (!shd->bound_buffer_indices) return AVERROR(ENOMEM); for (int i = 0; i < shd->nb_descriptor_sets; i++) shd->bound_buffer_indices[i] = i; } if (s->extensions & FF_VK_EXT_SHADER_OBJECT) { err = create_shader_object(s, shd, spirv, spirv_len, entrypoint); if (err < 0) return err; } else { VkShaderModule mod; err = create_shader_module(s, shd, &mod, spirv, spirv_len); if (err < 0) return err; switch (shd->bind_point) { case VK_PIPELINE_BIND_POINT_COMPUTE: err = init_compute_pipeline(s, shd, mod, entrypoint); break; default: av_log(s, AV_LOG_ERROR, "Unsupported shader type: %i\n", shd->bind_point); err = AVERROR(EINVAL); break; }; vk->DestroyShaderModule(s->hwctx->act_dev, mod, s->hwctx->alloc); if (err < 0) return err; } return 0; } static const struct descriptor_props { size_t struct_size; /* Size of the opaque which updates the descriptor */ const char *type; int is_uniform; int mem_quali; /* Can use a memory qualifier */ int dim_needed; /* Must indicate dimension */ int buf_content; /* Must indicate buffer contents */ } descriptor_props[] = { [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, }, [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, }, [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, }, [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, }, [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, }, [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, }, [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, }; int ff_vk_shader_add_descriptor_set(FFVulkanContext *s, FFVulkanShader *shd, FFVulkanDescriptorSetBinding *desc, int nb, int singular, int print_to_shader_only) { int has_sampler = 0; FFVulkanDescriptorSet *set; if (print_to_shader_only) goto print; /* Actual layout allocated for the pipeline */ set = av_realloc_array(shd->desc_set, sizeof(*shd->desc_set), shd->nb_descriptor_sets + 1); if (!set) return AVERROR(ENOMEM); shd->desc_set = set; set = &set[shd->nb_descriptor_sets]; memset(set, 0, sizeof(*set)); set->binding = av_calloc(nb, sizeof(*set->binding)); if (!set->binding) return AVERROR(ENOMEM); set->binding_offset = av_calloc(nb, sizeof(*set->binding_offset)); if (!set->binding_offset) { av_freep(&set->binding); return AVERROR(ENOMEM); } for (int i = 0; i < nb; i++) { set->binding[i].binding = i; set->binding[i].descriptorType = desc[i].type; set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1); set->binding[i].stageFlags = desc[i].stages; set->binding[i].pImmutableSamplers = desc[i].samplers; if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER || desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) has_sampler |= 1; } set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; if (has_sampler) set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER)) { for (int i = 0; i < nb; i++) { int j; VkDescriptorPoolSize *desc_pool_size; for (j = 0; j < shd->nb_desc_pool_size; j++) if (shd->desc_pool_size[j].type == desc[i].type) break; if (j >= shd->nb_desc_pool_size) { desc_pool_size = av_realloc_array(shd->desc_pool_size, sizeof(*desc_pool_size), shd->nb_desc_pool_size + 1); if (!desc_pool_size) return AVERROR(ENOMEM); shd->desc_pool_size = desc_pool_size; shd->nb_desc_pool_size++; memset(&desc_pool_size[j], 0, sizeof(VkDescriptorPoolSize)); } shd->desc_pool_size[j].type = desc[i].type; shd->desc_pool_size[j].descriptorCount += FFMAX(desc[i].elems, 1); } } set->singular = singular; set->nb_bindings = nb; shd->nb_descriptor_sets++; print: /* Write shader info */ for (int i = 0; i < nb; i++) { const struct descriptor_props *prop = &descriptor_props[desc[i].type]; GLSLA("layout (set = %i, binding = %i", shd->nb_descriptor_sets - 1, i); if (desc[i].mem_layout) GLSLA(", %s", desc[i].mem_layout); GLSLA(")"); if (prop->is_uniform) GLSLA(" uniform"); if (prop->mem_quali && desc[i].mem_quali) GLSLA(" %s", desc[i].mem_quali); if (prop->type) { GLSLA(" "); if (desc[i].type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) { if (desc[i].mem_layout) { int len = strlen(desc[i].mem_layout); if (desc[i].mem_layout[len - 1] == 'i' && desc[i].mem_layout[len - 2] == 'u') { GLSLA("u"); } else if (desc[i].mem_layout[len - 1] == 'i') { GLSLA("i"); } } } GLSLA("%s", prop->type); } if (prop->dim_needed) GLSLA("%iD", desc[i].dimensions); GLSLA(" %s", desc[i].name); if (prop->buf_content) GLSLA(" {\n %s\n}", desc[i].buf_content); else if (desc[i].elems > 0) GLSLA("[%i]", desc[i].elems); GLSLA(";"); GLSLA("\n"); } GLSLA("\n"); return 0; } int ff_vk_shader_register_exec(FFVulkanContext *s, FFVkExecPool *pool, FFVulkanShader *shd) { int err; FFVulkanShaderData *sd; if (!shd->nb_descriptor_sets) return 0; sd = av_realloc_array(pool->reg_shd, sizeof(*pool->reg_shd), pool->nb_reg_shd + 1); if (!sd) return AVERROR(ENOMEM); pool->reg_shd = sd; sd = &sd[pool->nb_reg_shd++]; memset(sd, 0, sizeof(*sd)); sd->shd = shd; sd->nb_descriptor_sets = shd->nb_descriptor_sets; if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { sd->desc_bind = av_malloc_array(sd->nb_descriptor_sets, sizeof(*sd->desc_bind)); if (!sd->desc_bind) return AVERROR(ENOMEM); sd->desc_set_buf = av_calloc(sd->nb_descriptor_sets, sizeof(*sd->desc_set_buf)); if (!sd->desc_set_buf) return AVERROR(ENOMEM); for (int i = 0; i < sd->nb_descriptor_sets; i++) { FFVulkanDescriptorSet *set = &shd->desc_set[i]; FFVulkanDescriptorSetData *sdb = &sd->desc_set_buf[i]; int nb = set->singular ? 1 : pool->pool_size; err = ff_vk_create_buf(s, &sdb->buf, set->aligned_size*nb, NULL, NULL, set->usage, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); if (err < 0) return err; err = ff_vk_map_buffer(s, &sdb->buf, &sdb->desc_mem, 0); if (err < 0) return err; sd->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT, .usage = set->usage, .address = sdb->buf.address, }; } } else if (!shd->use_push) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkDescriptorSetLayout *tmp_layouts; VkDescriptorSetAllocateInfo set_alloc_info; VkDescriptorPoolCreateInfo pool_create_info; for (int i = 0; i < shd->nb_desc_pool_size; i++) shd->desc_pool_size[i].descriptorCount *= pool->pool_size; pool_create_info = (VkDescriptorPoolCreateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .flags = 0, .pPoolSizes = shd->desc_pool_size, .poolSizeCount = shd->nb_desc_pool_size, .maxSets = sd->nb_descriptor_sets*pool->pool_size, }; ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, s->hwctx->alloc, &sd->desc_pool); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to create descriptor pool: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } tmp_layouts = av_malloc_array(pool_create_info.maxSets, sizeof(*tmp_layouts)); if (!tmp_layouts) return AVERROR(ENOMEM); /* Colate each execution context's descriptor set layouts */ for (int i = 0; i < pool->pool_size; i++) for (int j = 0; j < sd->nb_descriptor_sets; j++) tmp_layouts[i*sd->nb_descriptor_sets + j] = shd->desc_layout[j]; set_alloc_info = (VkDescriptorSetAllocateInfo) { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .descriptorPool = sd->desc_pool, .pSetLayouts = tmp_layouts, .descriptorSetCount = pool_create_info.maxSets, }; sd->desc_sets = av_malloc_array(pool_create_info.maxSets, sizeof(*tmp_layouts)); if (!sd->desc_sets) { av_free(tmp_layouts); return AVERROR(ENOMEM); } ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &set_alloc_info, sd->desc_sets); av_free(tmp_layouts); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", ff_vk_ret2str(ret)); av_freep(&sd->desc_sets); return AVERROR_EXTERNAL; } } return 0; } static inline FFVulkanShaderData *get_shd_data(FFVkExecContext *e, FFVulkanShader *shd) { for (int i = 0; i < e->parent->nb_reg_shd; i++) if (e->parent->reg_shd[i].shd == shd) return &e->parent->reg_shd[i]; return NULL; } static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, int set, int bind_idx, int array_idx, VkDescriptorGetInfoEXT *desc_get_info, size_t desc_size) { FFVulkanFunctions *vk = &s->vkfn; FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; FFVulkanShaderData *sd = get_shd_data(e, shd); const size_t exec_offset = desc_set->singular ? 0 : desc_set->aligned_size*e->idx; void *desc = sd->desc_set_buf[set].desc_mem + /* Base */ exec_offset + /* Execution context */ desc_set->binding_offset[bind_idx] + /* Descriptor binding */ array_idx*desc_size; /* Array position */ vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc); } static inline void update_set_pool_write(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, int set, VkWriteDescriptorSet *write_info) { FFVulkanFunctions *vk = &s->vkfn; FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; FFVulkanShaderData *sd = get_shd_data(e, shd); if (desc_set->singular) { for (int i = 0; i < e->parent->pool_size; i++) { write_info->dstSet = sd->desc_sets[i*sd->nb_descriptor_sets + set]; vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL); } } else { if (shd->use_push) { vk->CmdPushDescriptorSetKHR(e->buf, shd->bind_point, shd->pipeline_layout, set, 1, write_info); } else { write_info->dstSet = sd->desc_sets[e->idx*sd->nb_descriptor_sets + set]; vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL); } } } static int vk_set_descriptor_image(FFVulkanContext *s, FFVulkanShader *shd, FFVkExecContext *e, int set, int bind, int offs, VkImageView view, VkImageLayout layout, VkSampler sampler) { FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { VkDescriptorGetInfoEXT desc_get_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, .type = desc_set->binding[bind].descriptorType, }; VkDescriptorImageInfo desc_img_info = { .imageView = view, .sampler = sampler, .imageLayout = layout, }; size_t desc_size; switch (desc_get_info.type) { case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: desc_get_info.data.pSampledImage = &desc_img_info; desc_size = s->desc_buf_props.sampledImageDescriptorSize; break; case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: desc_get_info.data.pStorageImage = &desc_img_info; desc_size = s->desc_buf_props.storageImageDescriptorSize; break; case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: desc_get_info.data.pInputAttachmentImage = &desc_img_info; desc_size = s->desc_buf_props.inputAttachmentDescriptorSize; break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: desc_get_info.data.pCombinedImageSampler = &desc_img_info; desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize; break; default: av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", set, bind, desc_get_info.type); return AVERROR(EINVAL); break; }; update_set_descriptor(s, e, shd, set, bind, offs, &desc_get_info, desc_size); } else { VkDescriptorImageInfo desc_pool_write_info_img = { .sampler = sampler, .imageView = view, .imageLayout = layout, }; VkWriteDescriptorSet desc_pool_write_info = { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstBinding = bind, .descriptorCount = 1, .dstArrayElement = offs, .descriptorType = desc_set->binding[bind].descriptorType, .pImageInfo = &desc_pool_write_info_img, }; update_set_pool_write(s, e, shd, set, &desc_pool_write_info); } return 0; } int ff_vk_shader_update_desc_buffer(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, int set, int bind, int elem, FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, VkFormat fmt) { FFVulkanDescriptorSet *desc_set = &shd->desc_set[set]; if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { VkDescriptorGetInfoEXT desc_get_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, .type = desc_set->binding[bind].descriptorType, }; VkDescriptorAddressInfoEXT desc_buf_info = { .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT, .address = buf->address + offset, .range = len, .format = fmt, }; size_t desc_size; switch (desc_get_info.type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: desc_get_info.data.pUniformBuffer = &desc_buf_info; desc_size = s->desc_buf_props.uniformBufferDescriptorSize; break; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: desc_get_info.data.pStorageBuffer = &desc_buf_info; desc_size = s->desc_buf_props.storageBufferDescriptorSize; break; case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: desc_get_info.data.pUniformTexelBuffer = &desc_buf_info; desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize; break; case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: desc_get_info.data.pStorageTexelBuffer = &desc_buf_info; desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize; break; default: av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", set, bind, desc_get_info.type); return AVERROR(EINVAL); break; }; update_set_descriptor(s, e, shd, set, bind, elem, &desc_get_info, desc_size); } else { VkDescriptorBufferInfo desc_pool_write_info_buf = { .buffer = buf->buf, .offset = offset, .range = len, }; VkWriteDescriptorSet desc_pool_write_info = { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstBinding = bind, .descriptorCount = 1, .dstArrayElement = elem, .descriptorType = desc_set->binding[bind].descriptorType, .pBufferInfo = &desc_pool_write_info_buf, }; update_set_pool_write(s, e, shd, set, &desc_pool_write_info); } return 0; } void ff_vk_shader_update_img_array(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, AVFrame *f, VkImageView *views, int set, int binding, VkImageLayout layout, VkSampler sampler) { AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); for (int i = 0; i < nb_planes; i++) vk_set_descriptor_image(s, shd, e, set, binding, i, views[i], layout, sampler); } void ff_vk_shader_update_push_const(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd, VkShaderStageFlagBits stage, int offset, size_t size, void *src) { FFVulkanFunctions *vk = &s->vkfn; vk->CmdPushConstants(e->buf, shd->pipeline_layout, stage, offset, size, src); } void ff_vk_exec_bind_shader(FFVulkanContext *s, FFVkExecContext *e, FFVulkanShader *shd) { FFVulkanFunctions *vk = &s->vkfn; VkDeviceSize offsets[1024]; FFVulkanShaderData *sd = get_shd_data(e, shd); if (s->extensions & FF_VK_EXT_SHADER_OBJECT) { VkShaderStageFlagBits stages = shd->stage; vk->CmdBindShadersEXT(e->buf, 1, &stages, &shd->object); } else { vk->CmdBindPipeline(e->buf, shd->bind_point, shd->pipeline); } if (sd && sd->nb_descriptor_sets) { if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { for (int i = 0; i < sd->nb_descriptor_sets; i++) offsets[i] = shd->desc_set[i].singular ? 0 : shd->desc_set[i].aligned_size*e->idx; /* Bind descriptor buffers */ vk->CmdBindDescriptorBuffersEXT(e->buf, sd->nb_descriptor_sets, sd->desc_bind); /* Binding offsets */ vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, shd->bind_point, shd->pipeline_layout, 0, sd->nb_descriptor_sets, shd->bound_buffer_indices, offsets); } else if (!shd->use_push) { vk->CmdBindDescriptorSets(e->buf, shd->bind_point, shd->pipeline_layout, 0, sd->nb_descriptor_sets, &sd->desc_sets[e->idx*sd->nb_descriptor_sets], 0, NULL); } } } void ff_vk_shader_free(FFVulkanContext *s, FFVulkanShader *shd) { FFVulkanFunctions *vk = &s->vkfn; av_bprint_finalize(&shd->src, NULL); #if 0 if (shd->shader.module) vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc); #endif if (shd->object) vk->DestroyShaderEXT(s->hwctx->act_dev, shd->object, s->hwctx->alloc); if (shd->pipeline) vk->DestroyPipeline(s->hwctx->act_dev, shd->pipeline, s->hwctx->alloc); if (shd->pipeline_layout) vk->DestroyPipelineLayout(s->hwctx->act_dev, shd->pipeline_layout, s->hwctx->alloc); for (int i = 0; i < shd->nb_descriptor_sets; i++) { FFVulkanDescriptorSet *set = &shd->desc_set[i]; av_free(set->binding); av_free(set->binding_offset); } for (int i = 0; i < shd->nb_descriptor_sets; i++) if (shd->desc_layout[i]) vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, shd->desc_layout[i], s->hwctx->alloc); av_freep(&shd->desc_pool_size); av_freep(&shd->desc_layout); av_freep(&shd->desc_set); av_freep(&shd->bound_buffer_indices); av_freep(&shd->push_consts); shd->push_consts_num = 0; } void ff_vk_uninit(FFVulkanContext *s) { av_freep(&s->query_props); av_freep(&s->qf_props); av_freep(&s->video_props); av_freep(&s->coop_mat_props); av_buffer_unref(&s->device_ref); av_buffer_unref(&s->frames_ref); } int ff_vk_init(FFVulkanContext *s, void *log_parent, AVBufferRef *device_ref, AVBufferRef *frames_ref) { int err; static const AVClass vulkan_context_class = { .class_name = "vk", .version = LIBAVUTIL_VERSION_INT, .parent_log_context_offset = offsetof(FFVulkanContext, log_parent), }; memset(s, 0, sizeof(*s)); s->log_parent = log_parent; s->class = &vulkan_context_class; if (frames_ref) { s->frames_ref = av_buffer_ref(frames_ref); if (!s->frames_ref) return AVERROR(ENOMEM); s->frames = (AVHWFramesContext *)s->frames_ref->data; s->hwfc = s->frames->hwctx; device_ref = s->frames->device_ref; } s->device_ref = av_buffer_ref(device_ref); if (!s->device_ref) { ff_vk_uninit(s); return AVERROR(ENOMEM); } s->device = (AVHWDeviceContext *)s->device_ref->data; s->hwctx = s->device->hwctx; s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions, s->hwctx->nb_enabled_dev_extensions); s->extensions |= ff_vk_extensions_to_mask(s->hwctx->enabled_inst_extensions, s->hwctx->nb_enabled_inst_extensions); err = ff_vk_load_functions(s->device, &s->vkfn, s->extensions, 1, 1); if (err < 0) { ff_vk_uninit(s); return err; } err = ff_vk_load_props(s); if (err < 0) { ff_vk_uninit(s); return err; } return 0; }