mirror of https://github.com/FFmpeg/FFmpeg.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1584 lines
53 KiB
1584 lines
53 KiB
/* |
|
* This file is part of FFmpeg. |
|
* |
|
* FFmpeg is free software; you can redistribute it and/or |
|
* modify it under the terms of the GNU Lesser General Public |
|
* License as published by the Free Software Foundation; either |
|
* version 2.1 of the License, or (at your option) any later version. |
|
* |
|
* FFmpeg is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
* Lesser General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU Lesser General Public |
|
* License along with FFmpeg; if not, write to the Free Software |
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
*/ |
|
|
|
#include "formats.h" |
|
#include "vulkan.h" |
|
#include "glslang.h" |
|
|
|
#include "libavutil/avassert.h" |
|
#include "libavutil/vulkan_loader.h" |
|
|
|
/* Generic macro for creating contexts which need to keep their addresses |
|
* if another context is created. */ |
|
#define FN_CREATING(ctx, type, shortname, array, num) \ |
|
static av_always_inline type *create_ ##shortname(ctx *dctx) \ |
|
{ \ |
|
type **array, *sctx = av_mallocz(sizeof(*sctx)); \ |
|
if (!sctx) \ |
|
return NULL; \ |
|
\ |
|
array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\ |
|
if (!array) { \ |
|
av_free(sctx); \ |
|
return NULL; \ |
|
} \ |
|
\ |
|
dctx->array = array; \ |
|
dctx->array[dctx->num++] = sctx; \ |
|
\ |
|
return sctx; \ |
|
} |
|
|
|
const VkComponentMapping ff_comp_identity_map = { |
|
.r = VK_COMPONENT_SWIZZLE_IDENTITY, |
|
.g = VK_COMPONENT_SWIZZLE_IDENTITY, |
|
.b = VK_COMPONENT_SWIZZLE_IDENTITY, |
|
.a = VK_COMPONENT_SWIZZLE_IDENTITY, |
|
}; |
|
|
|
/* Converts return values to strings */ |
|
const char *ff_vk_ret2str(VkResult res) |
|
{ |
|
#define CASE(VAL) case VAL: return #VAL |
|
switch (res) { |
|
CASE(VK_SUCCESS); |
|
CASE(VK_NOT_READY); |
|
CASE(VK_TIMEOUT); |
|
CASE(VK_EVENT_SET); |
|
CASE(VK_EVENT_RESET); |
|
CASE(VK_INCOMPLETE); |
|
CASE(VK_ERROR_OUT_OF_HOST_MEMORY); |
|
CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); |
|
CASE(VK_ERROR_INITIALIZATION_FAILED); |
|
CASE(VK_ERROR_DEVICE_LOST); |
|
CASE(VK_ERROR_MEMORY_MAP_FAILED); |
|
CASE(VK_ERROR_LAYER_NOT_PRESENT); |
|
CASE(VK_ERROR_EXTENSION_NOT_PRESENT); |
|
CASE(VK_ERROR_FEATURE_NOT_PRESENT); |
|
CASE(VK_ERROR_INCOMPATIBLE_DRIVER); |
|
CASE(VK_ERROR_TOO_MANY_OBJECTS); |
|
CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); |
|
CASE(VK_ERROR_FRAGMENTED_POOL); |
|
CASE(VK_ERROR_SURFACE_LOST_KHR); |
|
CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); |
|
CASE(VK_SUBOPTIMAL_KHR); |
|
CASE(VK_ERROR_OUT_OF_DATE_KHR); |
|
CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); |
|
CASE(VK_ERROR_VALIDATION_FAILED_EXT); |
|
CASE(VK_ERROR_INVALID_SHADER_NV); |
|
CASE(VK_ERROR_OUT_OF_POOL_MEMORY); |
|
CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); |
|
CASE(VK_ERROR_NOT_PERMITTED_EXT); |
|
default: return "Unknown error"; |
|
} |
|
#undef CASE |
|
} |
|
|
|
void ff_vk_qf_init(AVFilterContext *avctx, FFVkQueueFamilyCtx *qf, |
|
VkQueueFlagBits dev_family, int nb_queues) |
|
{ |
|
FFVulkanContext *s = avctx->priv; |
|
|
|
switch (dev_family) { |
|
case VK_QUEUE_GRAPHICS_BIT: |
|
qf->queue_family = s->hwctx->queue_family_index; |
|
qf->actual_queues = s->hwctx->nb_graphics_queues; |
|
break; |
|
case VK_QUEUE_COMPUTE_BIT: |
|
qf->queue_family = s->hwctx->queue_family_comp_index; |
|
qf->actual_queues = s->hwctx->nb_comp_queues; |
|
break; |
|
case VK_QUEUE_TRANSFER_BIT: |
|
qf->queue_family = s->hwctx->queue_family_tx_index; |
|
qf->actual_queues = s->hwctx->nb_tx_queues; |
|
break; |
|
case VK_QUEUE_VIDEO_ENCODE_BIT_KHR: |
|
qf->queue_family = s->hwctx->queue_family_encode_index; |
|
qf->actual_queues = s->hwctx->nb_encode_queues; |
|
break; |
|
case VK_QUEUE_VIDEO_DECODE_BIT_KHR: |
|
qf->queue_family = s->hwctx->queue_family_decode_index; |
|
qf->actual_queues = s->hwctx->nb_decode_queues; |
|
break; |
|
default: |
|
av_assert0(0); /* Should never happen */ |
|
} |
|
|
|
if (!nb_queues) |
|
qf->nb_queues = qf->actual_queues; |
|
else |
|
qf->nb_queues = nb_queues; |
|
|
|
return; |
|
} |
|
|
|
void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) |
|
{ |
|
qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; |
|
} |
|
|
|
static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req, |
|
VkMemoryPropertyFlagBits req_flags, void *alloc_extension, |
|
VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) |
|
{ |
|
VkResult ret; |
|
int index = -1; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
VkMemoryAllocateInfo alloc_info = { |
|
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, |
|
.pNext = alloc_extension, |
|
}; |
|
|
|
/* Align if we need to */ |
|
if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) |
|
req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment); |
|
|
|
alloc_info.allocationSize = req->size; |
|
|
|
/* The vulkan spec requires memory types to be sorted in the "optimal" |
|
* order, so the first matching type we find will be the best/fastest one */ |
|
for (int i = 0; i < s->mprops.memoryTypeCount; i++) { |
|
/* The memory type must be supported by the requirements (bitfield) */ |
|
if (!(req->memoryTypeBits & (1 << i))) |
|
continue; |
|
|
|
/* The memory type flags must include our properties */ |
|
if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) |
|
continue; |
|
|
|
/* Found a suitable memory type */ |
|
index = i; |
|
break; |
|
} |
|
|
|
if (index < 0) { |
|
av_log(avctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", |
|
req_flags); |
|
return AVERROR(EINVAL); |
|
} |
|
|
|
alloc_info.memoryTypeIndex = index; |
|
|
|
ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info, |
|
s->hwctx->alloc, mem); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR(ENOMEM); |
|
} |
|
|
|
*mem_flags |= s->mprops.memoryTypes[index].propertyFlags; |
|
|
|
return 0; |
|
} |
|
|
|
int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size, |
|
VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) |
|
{ |
|
int err; |
|
VkResult ret; |
|
int use_ded_mem; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
VkBufferCreateInfo buf_spawn = { |
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |
|
.pNext = NULL, |
|
.usage = usage, |
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE, |
|
.size = size, /* Gets FFALIGNED during alloc if host visible |
|
but should be ok */ |
|
}; |
|
|
|
VkBufferMemoryRequirementsInfo2 req_desc = { |
|
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, |
|
}; |
|
VkMemoryDedicatedAllocateInfo ded_alloc = { |
|
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, |
|
.pNext = NULL, |
|
}; |
|
VkMemoryDedicatedRequirements ded_req = { |
|
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, |
|
}; |
|
VkMemoryRequirements2 req = { |
|
.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, |
|
.pNext = &ded_req, |
|
}; |
|
|
|
ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
|
|
req_desc.buffer = buf->buf; |
|
|
|
vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req); |
|
|
|
/* In case the implementation prefers/requires dedicated allocation */ |
|
use_ded_mem = ded_req.prefersDedicatedAllocation | |
|
ded_req.requiresDedicatedAllocation; |
|
if (use_ded_mem) |
|
ded_alloc.buffer = buf->buf; |
|
|
|
err = vk_alloc_mem(avctx, &req.memoryRequirements, flags, |
|
use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, |
|
&buf->flags, &buf->mem); |
|
if (err) |
|
return err; |
|
|
|
ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[], |
|
int nb_buffers, int invalidate) |
|
{ |
|
VkResult ret; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
VkMappedMemoryRange *inval_list = NULL; |
|
int inval_count = 0; |
|
|
|
for (int i = 0; i < nb_buffers; i++) { |
|
ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0, |
|
VK_WHOLE_SIZE, 0, (void **)&mem[i]); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
} |
|
|
|
if (!invalidate) |
|
return 0; |
|
|
|
for (int i = 0; i < nb_buffers; i++) { |
|
const VkMappedMemoryRange ival_buf = { |
|
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, |
|
.memory = buf[i].mem, |
|
.size = VK_WHOLE_SIZE, |
|
}; |
|
if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) |
|
continue; |
|
inval_list = av_fast_realloc(s->scratch, &s->scratch_size, |
|
(++inval_count)*sizeof(*inval_list)); |
|
if (!inval_list) |
|
return AVERROR(ENOMEM); |
|
inval_list[inval_count - 1] = ival_buf; |
|
} |
|
|
|
if (inval_count) { |
|
ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count, |
|
inval_list); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers, |
|
int flush) |
|
{ |
|
int err = 0; |
|
VkResult ret; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
VkMappedMemoryRange *flush_list = NULL; |
|
int flush_count = 0; |
|
|
|
if (flush) { |
|
for (int i = 0; i < nb_buffers; i++) { |
|
const VkMappedMemoryRange flush_buf = { |
|
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, |
|
.memory = buf[i].mem, |
|
.size = VK_WHOLE_SIZE, |
|
}; |
|
if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) |
|
continue; |
|
flush_list = av_fast_realloc(s->scratch, &s->scratch_size, |
|
(++flush_count)*sizeof(*flush_list)); |
|
if (!flush_list) |
|
return AVERROR(ENOMEM); |
|
flush_list[flush_count - 1] = flush_buf; |
|
} |
|
} |
|
|
|
if (flush_count) { |
|
ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count, |
|
flush_list); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", |
|
ff_vk_ret2str(ret)); |
|
err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ |
|
} |
|
} |
|
|
|
for (int i = 0; i < nb_buffers; i++) |
|
vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem); |
|
|
|
return err; |
|
} |
|
|
|
void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf) |
|
{ |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
if (!buf) |
|
return; |
|
|
|
if (buf->buf != VK_NULL_HANDLE) |
|
vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); |
|
if (buf->mem != VK_NULL_HANDLE) |
|
vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); |
|
} |
|
|
|
int ff_vk_add_push_constant(AVFilterContext *avctx, FFVulkanPipeline *pl, |
|
int offset, int size, VkShaderStageFlagBits stage) |
|
{ |
|
VkPushConstantRange *pc; |
|
|
|
pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), |
|
pl->push_consts_num + 1); |
|
if (!pl->push_consts) |
|
return AVERROR(ENOMEM); |
|
|
|
pc = &pl->push_consts[pl->push_consts_num++]; |
|
memset(pc, 0, sizeof(*pc)); |
|
|
|
pc->stageFlags = stage; |
|
pc->offset = offset; |
|
pc->size = size; |
|
|
|
return 0; |
|
} |
|
|
|
FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) |
|
int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, |
|
FFVkQueueFamilyCtx *qf) |
|
{ |
|
VkResult ret; |
|
FFVkExecContext *e; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
VkCommandPoolCreateInfo cqueue_create = { |
|
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, |
|
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, |
|
.queueFamilyIndex = qf->queue_family, |
|
}; |
|
VkCommandBufferAllocateInfo cbuf_create = { |
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, |
|
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, |
|
.commandBufferCount = qf->nb_queues, |
|
}; |
|
|
|
e = create_exec_ctx(s); |
|
if (!e) |
|
return AVERROR(ENOMEM); |
|
|
|
e->qf = qf; |
|
|
|
e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues)); |
|
if (!e->queues) |
|
return AVERROR(ENOMEM); |
|
|
|
e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs)); |
|
if (!e->bufs) |
|
return AVERROR(ENOMEM); |
|
|
|
/* Create command pool */ |
|
ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, |
|
s->hwctx->alloc, &e->pool); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
|
|
cbuf_create.commandPool = e->pool; |
|
|
|
/* Allocate command buffer */ |
|
ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
|
|
for (int i = 0; i < qf->nb_queues; i++) { |
|
FFVkQueueCtx *q = &e->queues[i]; |
|
vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, |
|
i % qf->actual_queues, &q->queue); |
|
} |
|
|
|
*ctx = e; |
|
|
|
return 0; |
|
} |
|
|
|
void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e) |
|
{ |
|
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; |
|
|
|
for (int j = 0; j < q->nb_buf_deps; j++) |
|
av_buffer_unref(&q->buf_deps[j]); |
|
q->nb_buf_deps = 0; |
|
|
|
for (int j = 0; j < q->nb_frame_deps; j++) |
|
av_frame_free(&q->frame_deps[j]); |
|
q->nb_frame_deps = 0; |
|
|
|
e->sem_wait_cnt = 0; |
|
e->sem_sig_cnt = 0; |
|
} |
|
|
|
int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e) |
|
{ |
|
VkResult ret; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; |
|
|
|
VkCommandBufferBeginInfo cmd_start = { |
|
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, |
|
.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, |
|
}; |
|
|
|
/* Create the fence and don't wait for it initially */ |
|
if (!q->fence) { |
|
VkFenceCreateInfo fence_spawn = { |
|
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, |
|
}; |
|
ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc, |
|
&q->fence); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
} else { |
|
vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); |
|
vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); |
|
} |
|
|
|
/* Discard queue dependencies */ |
|
ff_vk_discard_exec_deps(avctx, e); |
|
|
|
ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e) |
|
{ |
|
return e->bufs[e->qf->cur_queue]; |
|
} |
|
|
|
int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, |
|
AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag) |
|
{ |
|
AVFrame **dst; |
|
AVVkFrame *f = (AVVkFrame *)frame->data[0]; |
|
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; |
|
AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; |
|
int planes = av_pix_fmt_count_planes(fc->sw_format); |
|
|
|
for (int i = 0; i < planes; i++) { |
|
e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, |
|
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); |
|
if (!e->sem_wait) { |
|
ff_vk_discard_exec_deps(avctx, e); |
|
return AVERROR(ENOMEM); |
|
} |
|
|
|
e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, |
|
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); |
|
if (!e->sem_wait_dst) { |
|
ff_vk_discard_exec_deps(avctx, e); |
|
return AVERROR(ENOMEM); |
|
} |
|
|
|
e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc, |
|
(e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val)); |
|
if (!e->sem_wait_val) { |
|
ff_vk_discard_exec_deps(avctx, e); |
|
return AVERROR(ENOMEM); |
|
} |
|
|
|
e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, |
|
(e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); |
|
if (!e->sem_sig) { |
|
ff_vk_discard_exec_deps(avctx, e); |
|
return AVERROR(ENOMEM); |
|
} |
|
|
|
e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc, |
|
(e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val)); |
|
if (!e->sem_sig_val) { |
|
ff_vk_discard_exec_deps(avctx, e); |
|
return AVERROR(ENOMEM); |
|
} |
|
|
|
e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc, |
|
(e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst)); |
|
if (!e->sem_sig_val_dst) { |
|
ff_vk_discard_exec_deps(avctx, e); |
|
return AVERROR(ENOMEM); |
|
} |
|
|
|
e->sem_wait[e->sem_wait_cnt] = f->sem[i]; |
|
e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; |
|
e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i]; |
|
e->sem_wait_cnt++; |
|
|
|
e->sem_sig[e->sem_sig_cnt] = f->sem[i]; |
|
e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1; |
|
e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i]; |
|
e->sem_sig_cnt++; |
|
} |
|
|
|
dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size, |
|
(q->nb_frame_deps + 1) * sizeof(*dst)); |
|
if (!dst) { |
|
ff_vk_discard_exec_deps(avctx, e); |
|
return AVERROR(ENOMEM); |
|
} |
|
|
|
q->frame_deps = dst; |
|
q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame); |
|
if (!q->frame_deps[q->nb_frame_deps]) { |
|
ff_vk_discard_exec_deps(avctx, e); |
|
return AVERROR(ENOMEM); |
|
} |
|
q->nb_frame_deps++; |
|
|
|
return 0; |
|
} |
|
|
|
int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e) |
|
{ |
|
VkResult ret; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; |
|
|
|
VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { |
|
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, |
|
.pWaitSemaphoreValues = e->sem_wait_val, |
|
.pSignalSemaphoreValues = e->sem_sig_val, |
|
.waitSemaphoreValueCount = e->sem_wait_cnt, |
|
.signalSemaphoreValueCount = e->sem_sig_cnt, |
|
}; |
|
|
|
VkSubmitInfo s_info = { |
|
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, |
|
.pNext = &s_timeline_sem_info, |
|
|
|
.commandBufferCount = 1, |
|
.pCommandBuffers = &e->bufs[e->qf->cur_queue], |
|
|
|
.pWaitSemaphores = e->sem_wait, |
|
.pWaitDstStageMask = e->sem_wait_dst, |
|
.waitSemaphoreCount = e->sem_wait_cnt, |
|
|
|
.pSignalSemaphores = e->sem_sig, |
|
.signalSemaphoreCount = e->sem_sig_cnt, |
|
}; |
|
|
|
ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
|
|
ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
|
|
for (int i = 0; i < e->sem_sig_cnt; i++) |
|
*e->sem_sig_val_dst[i] += 1; |
|
|
|
return 0; |
|
} |
|
|
|
int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e, |
|
AVBufferRef **deps, int nb_deps) |
|
{ |
|
AVBufferRef **dst; |
|
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; |
|
|
|
if (!deps || !nb_deps) |
|
return 0; |
|
|
|
dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, |
|
(q->nb_buf_deps + nb_deps) * sizeof(*dst)); |
|
if (!dst) |
|
goto err; |
|
|
|
q->buf_deps = dst; |
|
|
|
for (int i = 0; i < nb_deps; i++) { |
|
q->buf_deps[q->nb_buf_deps] = deps[i]; |
|
if (!q->buf_deps[q->nb_buf_deps]) |
|
goto err; |
|
q->nb_buf_deps++; |
|
} |
|
|
|
return 0; |
|
|
|
err: |
|
ff_vk_discard_exec_deps(avctx, e); |
|
return AVERROR(ENOMEM); |
|
} |
|
|
|
static int vulkan_filter_set_device(AVFilterContext *avctx, |
|
AVBufferRef *device) |
|
{ |
|
FFVulkanContext *s = avctx->priv; |
|
|
|
av_buffer_unref(&s->device_ref); |
|
|
|
s->device_ref = av_buffer_ref(device); |
|
if (!s->device_ref) |
|
return AVERROR(ENOMEM); |
|
|
|
s->device = (AVHWDeviceContext*)s->device_ref->data; |
|
s->hwctx = s->device->hwctx; |
|
|
|
return 0; |
|
} |
|
|
|
static int vulkan_filter_set_frames(AVFilterContext *avctx, |
|
AVBufferRef *frames) |
|
{ |
|
FFVulkanContext *s = avctx->priv; |
|
|
|
av_buffer_unref(&s->frames_ref); |
|
|
|
s->frames_ref = av_buffer_ref(frames); |
|
if (!s->frames_ref) |
|
return AVERROR(ENOMEM); |
|
|
|
return 0; |
|
} |
|
|
|
int ff_vk_filter_config_input(AVFilterLink *inlink) |
|
{ |
|
int err; |
|
AVFilterContext *avctx = inlink->dst; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
AVHWFramesContext *input_frames; |
|
|
|
if (!inlink->hw_frames_ctx) { |
|
av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " |
|
"hardware frames context on the input.\n"); |
|
return AVERROR(EINVAL); |
|
} |
|
|
|
/* Extract the device and default output format from the first input. */ |
|
if (avctx->inputs[0] != inlink) |
|
return 0; |
|
|
|
input_frames = (AVHWFramesContext *)inlink->hw_frames_ctx->data; |
|
if (input_frames->format != AV_PIX_FMT_VULKAN) |
|
return AVERROR(EINVAL); |
|
|
|
err = vulkan_filter_set_device(avctx, input_frames->device_ref); |
|
if (err < 0) |
|
return err; |
|
err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx); |
|
if (err < 0) |
|
return err; |
|
|
|
s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions, |
|
s->hwctx->nb_enabled_dev_extensions); |
|
|
|
err = ff_vk_load_functions(s->device, &s->vkfn, s->extensions, 1, 1); |
|
if (err < 0) |
|
return err; |
|
|
|
vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props); |
|
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); |
|
|
|
/* Default output parameters match input parameters. */ |
|
s->input_format = input_frames->sw_format; |
|
if (s->output_format == AV_PIX_FMT_NONE) |
|
s->output_format = input_frames->sw_format; |
|
if (!s->output_width) |
|
s->output_width = inlink->w; |
|
if (!s->output_height) |
|
s->output_height = inlink->h; |
|
|
|
return 0; |
|
} |
|
|
|
int ff_vk_filter_config_output_inplace(AVFilterLink *outlink) |
|
{ |
|
int err; |
|
AVFilterContext *avctx = outlink->src; |
|
FFVulkanContext *s = avctx->priv; |
|
|
|
av_buffer_unref(&outlink->hw_frames_ctx); |
|
|
|
if (!s->device_ref) { |
|
if (!avctx->hw_device_ctx) { |
|
av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " |
|
"Vulkan device.\n"); |
|
return AVERROR(EINVAL); |
|
} |
|
|
|
err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); |
|
if (err < 0) |
|
return err; |
|
} |
|
|
|
outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref); |
|
if (!outlink->hw_frames_ctx) |
|
return AVERROR(ENOMEM); |
|
|
|
outlink->w = s->output_width; |
|
outlink->h = s->output_height; |
|
|
|
return 0; |
|
} |
|
|
|
int ff_vk_filter_config_output(AVFilterLink *outlink) |
|
{ |
|
int err; |
|
AVFilterContext *avctx = outlink->src; |
|
FFVulkanContext *s = avctx->priv; |
|
AVBufferRef *output_frames_ref; |
|
AVHWFramesContext *output_frames; |
|
|
|
av_buffer_unref(&outlink->hw_frames_ctx); |
|
|
|
if (!s->device_ref) { |
|
if (!avctx->hw_device_ctx) { |
|
av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " |
|
"Vulkan device.\n"); |
|
return AVERROR(EINVAL); |
|
} |
|
|
|
err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); |
|
if (err < 0) |
|
return err; |
|
} |
|
|
|
output_frames_ref = av_hwframe_ctx_alloc(s->device_ref); |
|
if (!output_frames_ref) { |
|
err = AVERROR(ENOMEM); |
|
goto fail; |
|
} |
|
output_frames = (AVHWFramesContext*)output_frames_ref->data; |
|
|
|
output_frames->format = AV_PIX_FMT_VULKAN; |
|
output_frames->sw_format = s->output_format; |
|
output_frames->width = s->output_width; |
|
output_frames->height = s->output_height; |
|
|
|
err = av_hwframe_ctx_init(output_frames_ref); |
|
if (err < 0) { |
|
av_log(avctx, AV_LOG_ERROR, "Failed to initialise output " |
|
"frames: %d.\n", err); |
|
goto fail; |
|
} |
|
|
|
outlink->hw_frames_ctx = output_frames_ref; |
|
outlink->w = s->output_width; |
|
outlink->h = s->output_height; |
|
|
|
return 0; |
|
fail: |
|
av_buffer_unref(&output_frames_ref); |
|
return err; |
|
} |
|
|
|
int ff_vk_filter_init(AVFilterContext *avctx) |
|
{ |
|
FFVulkanContext *s = avctx->priv; |
|
|
|
s->output_format = AV_PIX_FMT_NONE; |
|
|
|
if (ff_vk_glslang_init()) |
|
return AVERROR_EXTERNAL; |
|
|
|
return 0; |
|
} |
|
|
|
FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num) |
|
FFVkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, |
|
VkFilter filt) |
|
{ |
|
VkResult ret; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
VkSamplerCreateInfo sampler_info = { |
|
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, |
|
.magFilter = filt, |
|
.minFilter = sampler_info.magFilter, |
|
.mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST : |
|
VK_SAMPLER_MIPMAP_MODE_LINEAR, |
|
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, |
|
.addressModeV = sampler_info.addressModeU, |
|
.addressModeW = sampler_info.addressModeU, |
|
.anisotropyEnable = VK_FALSE, |
|
.compareOp = VK_COMPARE_OP_NEVER, |
|
.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, |
|
.unnormalizedCoordinates = unnorm_coords, |
|
}; |
|
|
|
FFVkSampler *sctx = create_sampler(s); |
|
if (!sctx) |
|
return NULL; |
|
|
|
ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, |
|
s->hwctx->alloc, &sctx->sampler[0]); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return NULL; |
|
} |
|
|
|
for (int i = 1; i < 4; i++) |
|
sctx->sampler[i] = sctx->sampler[0]; |
|
|
|
return sctx; |
|
} |
|
|
|
int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) |
|
{ |
|
if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA || |
|
pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 || |
|
pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 || |
|
pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 || |
|
pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 || |
|
pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0) |
|
return 1; |
|
return 0; |
|
} |
|
|
|
const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt) |
|
{ |
|
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt); |
|
const int high = desc->comp[0].depth > 8; |
|
return high ? "rgba16f" : "rgba8"; |
|
} |
|
|
|
typedef struct ImageViewCtx { |
|
VkImageView view; |
|
} ImageViewCtx; |
|
|
|
static void destroy_imageview(void *opaque, uint8_t *data) |
|
{ |
|
FFVulkanContext *s = opaque; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
ImageViewCtx *iv = (ImageViewCtx *)data; |
|
|
|
vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc); |
|
av_free(iv); |
|
} |
|
|
|
int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e, |
|
VkImageView *v, VkImage img, VkFormat fmt, |
|
const VkComponentMapping map) |
|
{ |
|
int err; |
|
AVBufferRef *buf; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
VkImageViewCreateInfo imgview_spawn = { |
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, |
|
.pNext = NULL, |
|
.image = img, |
|
.viewType = VK_IMAGE_VIEW_TYPE_2D, |
|
.format = fmt, |
|
.components = map, |
|
.subresourceRange = { |
|
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, |
|
.baseMipLevel = 0, |
|
.levelCount = 1, |
|
.baseArrayLayer = 0, |
|
.layerCount = 1, |
|
}, |
|
}; |
|
|
|
ImageViewCtx *iv = av_mallocz(sizeof(*iv)); |
|
|
|
VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn, |
|
s->hwctx->alloc, &iv->view); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Failed to create imageview: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
|
|
buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0); |
|
if (!buf) { |
|
destroy_imageview(s, (uint8_t *)iv); |
|
return AVERROR(ENOMEM); |
|
} |
|
|
|
/* Add to queue dependencies */ |
|
err = ff_vk_add_dep_exec_ctx(avctx, e, &buf, 1); |
|
if (err) { |
|
av_buffer_unref(&buf); |
|
return err; |
|
} |
|
|
|
*v = iv->view; |
|
|
|
return 0; |
|
} |
|
|
|
FN_CREATING(FFVulkanPipeline, FFSPIRVShader, shader, shaders, shaders_num) |
|
FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, FFVulkanPipeline *pl, |
|
const char *name, VkShaderStageFlags stage) |
|
{ |
|
FFSPIRVShader *shd = create_shader(pl); |
|
if (!shd) |
|
return NULL; |
|
|
|
av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); |
|
|
|
shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; |
|
shd->shader.stage = stage; |
|
|
|
shd->name = name; |
|
|
|
GLSLF(0, #version %i ,460); |
|
GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); |
|
GLSLC(0, ); |
|
|
|
return shd; |
|
} |
|
|
|
void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, FFSPIRVShader *shd, |
|
int local_size[3]) |
|
{ |
|
shd->local_size[0] = local_size[0]; |
|
shd->local_size[1] = local_size[1]; |
|
shd->local_size[2] = local_size[2]; |
|
|
|
av_bprintf(&shd->src, "layout (local_size_x = %i, " |
|
"local_size_y = %i, local_size_z = %i) in;\n\n", |
|
shd->local_size[0], shd->local_size[1], shd->local_size[2]); |
|
} |
|
|
|
void ff_vk_print_shader(AVFilterContext *avctx, FFSPIRVShader *shd, int prio) |
|
{ |
|
int line = 0; |
|
const char *p = shd->src.str; |
|
const char *start = p; |
|
|
|
AVBPrint buf; |
|
av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); |
|
|
|
for (int i = 0; i < strlen(p); i++) { |
|
if (p[i] == '\n') { |
|
av_bprintf(&buf, "%i\t", ++line); |
|
av_bprint_append_data(&buf, start, &p[i] - start + 1); |
|
start = &p[i + 1]; |
|
} |
|
} |
|
|
|
av_log(avctx, prio, "Shader %s: \n%s", shd->name, buf.str); |
|
av_bprint_finalize(&buf, NULL); |
|
} |
|
|
|
int ff_vk_compile_shader(AVFilterContext *avctx, FFSPIRVShader *shd, |
|
const char *entrypoint) |
|
{ |
|
int err; |
|
VkResult ret; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
VkShaderModuleCreateInfo shader_create; |
|
uint8_t *spirv; |
|
size_t spirv_size; |
|
void *priv; |
|
|
|
shd->shader.pName = entrypoint; |
|
|
|
err = ff_vk_glslang_shader_compile(avctx, shd, &spirv, &spirv_size, &priv); |
|
if (err < 0) |
|
return err; |
|
|
|
ff_vk_print_shader(avctx, shd, AV_LOG_VERBOSE); |
|
|
|
av_log(avctx, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n", |
|
shd->name, spirv_size); |
|
|
|
shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; |
|
shader_create.pNext = NULL; |
|
shader_create.codeSize = spirv_size; |
|
shader_create.flags = 0; |
|
shader_create.pCode = (void *)spirv; |
|
|
|
ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL, |
|
&shd->shader.module); |
|
|
|
ff_vk_glslang_shader_free(priv); |
|
|
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Unable to create shader module: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
static const struct descriptor_props { |
|
size_t struct_size; /* Size of the opaque which updates the descriptor */ |
|
const char *type; |
|
int is_uniform; |
|
int mem_quali; /* Can use a memory qualifier */ |
|
int dim_needed; /* Must indicate dimension */ |
|
int buf_content; /* Must indicate buffer contents */ |
|
} descriptor_props[] = { |
|
[VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, }, |
|
[VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, }, |
|
[VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, }, |
|
[VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, }, |
|
[VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, }, |
|
[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, |
|
[VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, |
|
[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, |
|
[VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, |
|
[VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, }, |
|
[VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, |
|
}; |
|
|
|
int ff_vk_add_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl, |
|
FFSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, |
|
int num, int only_print_to_shader) |
|
{ |
|
VkResult ret; |
|
VkDescriptorSetLayout *layout; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
if (only_print_to_shader) |
|
goto print; |
|
|
|
pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), |
|
pl->desc_layout_num + pl->qf->nb_queues); |
|
if (!pl->desc_layout) |
|
return AVERROR(ENOMEM); |
|
|
|
pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized, |
|
sizeof(*pl->desc_set_initialized), |
|
pl->descriptor_sets_num + 1); |
|
if (!pl->desc_set_initialized) |
|
return AVERROR(ENOMEM); |
|
|
|
pl->desc_set_initialized[pl->descriptor_sets_num] = 0; |
|
layout = &pl->desc_layout[pl->desc_layout_num]; |
|
|
|
{ /* Create descriptor set layout descriptions */ |
|
VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; |
|
VkDescriptorSetLayoutBinding *desc_binding; |
|
|
|
desc_binding = av_mallocz(sizeof(*desc_binding)*num); |
|
if (!desc_binding) |
|
return AVERROR(ENOMEM); |
|
|
|
for (int i = 0; i < num; i++) { |
|
desc_binding[i].binding = i; |
|
desc_binding[i].descriptorType = desc[i].type; |
|
desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); |
|
desc_binding[i].stageFlags = desc[i].stages; |
|
desc_binding[i].pImmutableSamplers = desc[i].sampler ? |
|
desc[i].sampler->sampler : |
|
NULL; |
|
} |
|
|
|
desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; |
|
desc_create_layout.pBindings = desc_binding; |
|
desc_create_layout.bindingCount = num; |
|
|
|
for (int i = 0; i < pl->qf->nb_queues; i++) { |
|
ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, |
|
s->hwctx->alloc, &layout[i]); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set " |
|
"layout: %s\n", ff_vk_ret2str(ret)); |
|
av_free(desc_binding); |
|
return AVERROR_EXTERNAL; |
|
} |
|
} |
|
|
|
av_free(desc_binding); |
|
} |
|
|
|
{ /* Pool each descriptor by type and update pool counts */ |
|
for (int i = 0; i < num; i++) { |
|
int j; |
|
for (j = 0; j < pl->pool_size_desc_num; j++) |
|
if (pl->pool_size_desc[j].type == desc[i].type) |
|
break; |
|
if (j >= pl->pool_size_desc_num) { |
|
pl->pool_size_desc = av_realloc_array(pl->pool_size_desc, |
|
sizeof(*pl->pool_size_desc), |
|
++pl->pool_size_desc_num); |
|
if (!pl->pool_size_desc) |
|
return AVERROR(ENOMEM); |
|
memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); |
|
} |
|
pl->pool_size_desc[j].type = desc[i].type; |
|
pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues; |
|
} |
|
} |
|
|
|
{ /* Create template creation struct */ |
|
VkDescriptorUpdateTemplateCreateInfo *dt; |
|
VkDescriptorUpdateTemplateEntry *des_entries; |
|
|
|
/* Freed after descriptor set initialization */ |
|
des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry)); |
|
if (!des_entries) |
|
return AVERROR(ENOMEM); |
|
|
|
for (int i = 0; i < num; i++) { |
|
des_entries[i].dstBinding = i; |
|
des_entries[i].descriptorType = desc[i].type; |
|
des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1); |
|
des_entries[i].dstArrayElement = 0; |
|
des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s; |
|
des_entries[i].stride = descriptor_props[desc[i].type].struct_size; |
|
} |
|
|
|
pl->desc_template_info = av_realloc_array(pl->desc_template_info, |
|
sizeof(*pl->desc_template_info), |
|
pl->total_descriptor_sets + pl->qf->nb_queues); |
|
if (!pl->desc_template_info) |
|
return AVERROR(ENOMEM); |
|
|
|
dt = &pl->desc_template_info[pl->total_descriptor_sets]; |
|
memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues); |
|
|
|
for (int i = 0; i < pl->qf->nb_queues; i++) { |
|
dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; |
|
dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; |
|
dt[i].descriptorSetLayout = layout[i]; |
|
dt[i].pDescriptorUpdateEntries = des_entries; |
|
dt[i].descriptorUpdateEntryCount = num; |
|
} |
|
} |
|
|
|
pl->descriptor_sets_num++; |
|
|
|
pl->desc_layout_num += pl->qf->nb_queues; |
|
pl->total_descriptor_sets += pl->qf->nb_queues; |
|
|
|
print: |
|
/* Write shader info */ |
|
for (int i = 0; i < num; i++) { |
|
const struct descriptor_props *prop = &descriptor_props[desc[i].type]; |
|
GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); |
|
|
|
if (desc[i].mem_layout) |
|
GLSLA(", %s", desc[i].mem_layout); |
|
GLSLA(")"); |
|
|
|
if (prop->is_uniform) |
|
GLSLA(" uniform"); |
|
|
|
if (prop->mem_quali && desc[i].mem_quali) |
|
GLSLA(" %s", desc[i].mem_quali); |
|
|
|
if (prop->type) |
|
GLSLA(" %s", prop->type); |
|
|
|
if (prop->dim_needed) |
|
GLSLA("%iD", desc[i].dimensions); |
|
|
|
GLSLA(" %s", desc[i].name); |
|
|
|
if (prop->buf_content) |
|
GLSLA(" {\n %s\n}", desc[i].buf_content); |
|
else if (desc[i].elems > 0) |
|
GLSLA("[%i]", desc[i].elems); |
|
|
|
GLSLA(";\n"); |
|
} |
|
GLSLA("\n"); |
|
|
|
return 0; |
|
} |
|
|
|
void ff_vk_update_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl, |
|
int set_id) |
|
{ |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
/* If a set has never been updated, update all queues' sets. */ |
|
if (!pl->desc_set_initialized[set_id]) { |
|
for (int i = 0; i < pl->qf->nb_queues; i++) { |
|
int idx = set_id*pl->qf->nb_queues + i; |
|
vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, |
|
pl->desc_set[idx], |
|
pl->desc_template[idx], |
|
s); |
|
} |
|
pl->desc_set_initialized[set_id] = 1; |
|
return; |
|
} |
|
|
|
set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue; |
|
|
|
vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, |
|
pl->desc_set[set_id], |
|
pl->desc_template[set_id], |
|
s); |
|
} |
|
|
|
void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e, |
|
VkShaderStageFlagBits stage, int offset, |
|
size_t size, void *src) |
|
{ |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout, |
|
stage, offset, size, src); |
|
} |
|
|
|
int ff_vk_init_pipeline_layout(AVFilterContext *avctx, FFVulkanPipeline *pl) |
|
{ |
|
VkResult ret; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging)); |
|
if (!pl->desc_staging) |
|
return AVERROR(ENOMEM); |
|
|
|
{ /* Init descriptor set pool */ |
|
VkDescriptorPoolCreateInfo pool_create_info = { |
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, |
|
.poolSizeCount = pl->pool_size_desc_num, |
|
.pPoolSizes = pl->pool_size_desc, |
|
.maxSets = pl->total_descriptor_sets, |
|
}; |
|
|
|
ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, |
|
s->hwctx->alloc, &pl->desc_pool); |
|
av_freep(&pl->pool_size_desc); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set " |
|
"pool: %s\n", ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
} |
|
|
|
{ /* Allocate descriptor sets */ |
|
VkDescriptorSetAllocateInfo alloc_info = { |
|
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, |
|
.descriptorPool = pl->desc_pool, |
|
.descriptorSetCount = pl->total_descriptor_sets, |
|
.pSetLayouts = pl->desc_layout, |
|
}; |
|
|
|
pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set)); |
|
if (!pl->desc_set) |
|
return AVERROR(ENOMEM); |
|
|
|
ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info, |
|
pl->desc_set); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
} |
|
|
|
{ /* Finally create the pipeline layout */ |
|
VkPipelineLayoutCreateInfo spawn_pipeline_layout = { |
|
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, |
|
.pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging, |
|
.pushConstantRangeCount = pl->push_consts_num, |
|
.pPushConstantRanges = pl->push_consts, |
|
}; |
|
|
|
for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) |
|
pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i]; |
|
|
|
ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, |
|
s->hwctx->alloc, &pl->pipeline_layout); |
|
av_freep(&pl->push_consts); |
|
pl->push_consts_num = 0; |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
} |
|
|
|
{ /* Descriptor template (for tightly packed descriptors) */ |
|
VkDescriptorUpdateTemplateCreateInfo *dt; |
|
|
|
pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template)); |
|
if (!pl->desc_template) |
|
return AVERROR(ENOMEM); |
|
|
|
/* Create update templates for the descriptor sets */ |
|
for (int i = 0; i < pl->total_descriptor_sets; i++) { |
|
dt = &pl->desc_template_info[i]; |
|
dt->pipelineLayout = pl->pipeline_layout; |
|
ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev, |
|
dt, s->hwctx->alloc, |
|
&pl->desc_template[i]); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor " |
|
"template: %s\n", ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
} |
|
|
|
/* Free the duplicated memory used for the template entries */ |
|
for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { |
|
dt = &pl->desc_template_info[i]; |
|
av_free((void *)dt->pDescriptorUpdateEntries); |
|
} |
|
|
|
av_freep(&pl->desc_template_info); |
|
} |
|
|
|
return 0; |
|
} |
|
|
|
FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num) |
|
FFVulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx, |
|
FFVkQueueFamilyCtx *qf) |
|
{ |
|
FFVulkanPipeline *pl = create_pipeline(avctx->priv); |
|
if (pl) |
|
pl->qf = qf; |
|
|
|
return pl; |
|
} |
|
|
|
int ff_vk_init_compute_pipeline(AVFilterContext *avctx, FFVulkanPipeline *pl) |
|
{ |
|
int i; |
|
VkResult ret; |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
VkComputePipelineCreateInfo pipe = { |
|
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, |
|
.layout = pl->pipeline_layout, |
|
}; |
|
|
|
for (i = 0; i < pl->shaders_num; i++) { |
|
if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) { |
|
pipe.stage = pl->shaders[i]->shader; |
|
break; |
|
} |
|
} |
|
if (i == pl->shaders_num) { |
|
av_log(avctx, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n"); |
|
return AVERROR(EINVAL); |
|
} |
|
|
|
ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe, |
|
s->hwctx->alloc, &pl->pipeline); |
|
if (ret != VK_SUCCESS) { |
|
av_log(avctx, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", |
|
ff_vk_ret2str(ret)); |
|
return AVERROR_EXTERNAL; |
|
} |
|
|
|
pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; |
|
|
|
return 0; |
|
} |
|
|
|
void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, |
|
FFVulkanPipeline *pl) |
|
{ |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline); |
|
|
|
for (int i = 0; i < pl->descriptor_sets_num; i++) |
|
pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue]; |
|
|
|
vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point, |
|
pl->pipeline_layout, 0, |
|
pl->descriptor_sets_num, |
|
(VkDescriptorSet *)pl->desc_staging, |
|
0, NULL); |
|
|
|
e->bound_pl = pl; |
|
} |
|
|
|
static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) |
|
{ |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
/* Make sure all queues have finished executing */ |
|
for (int i = 0; i < e->qf->nb_queues; i++) { |
|
FFVkQueueCtx *q = &e->queues[i]; |
|
|
|
if (q->fence) { |
|
vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); |
|
vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); |
|
} |
|
|
|
/* Free the fence */ |
|
if (q->fence) |
|
vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc); |
|
|
|
/* Free buffer dependencies */ |
|
for (int j = 0; j < q->nb_buf_deps; j++) |
|
av_buffer_unref(&q->buf_deps[j]); |
|
av_free(q->buf_deps); |
|
|
|
/* Free frame dependencies */ |
|
for (int j = 0; j < q->nb_frame_deps; j++) |
|
av_frame_free(&q->frame_deps[j]); |
|
av_free(q->frame_deps); |
|
} |
|
|
|
if (e->bufs) |
|
vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs); |
|
if (e->pool) |
|
vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); |
|
|
|
av_freep(&e->bufs); |
|
av_freep(&e->queues); |
|
av_freep(&e->sem_sig); |
|
av_freep(&e->sem_sig_val); |
|
av_freep(&e->sem_sig_val_dst); |
|
av_freep(&e->sem_wait); |
|
av_freep(&e->sem_wait_dst); |
|
av_freep(&e->sem_wait_val); |
|
av_free(e); |
|
} |
|
|
|
static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) |
|
{ |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
for (int i = 0; i < pl->shaders_num; i++) { |
|
FFSPIRVShader *shd = pl->shaders[i]; |
|
av_bprint_finalize(&shd->src, NULL); |
|
vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, |
|
s->hwctx->alloc); |
|
av_free(shd); |
|
} |
|
|
|
vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); |
|
vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, |
|
s->hwctx->alloc); |
|
|
|
for (int i = 0; i < pl->desc_layout_num; i++) { |
|
if (pl->desc_template && pl->desc_template[i]) |
|
vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i], |
|
s->hwctx->alloc); |
|
if (pl->desc_layout && pl->desc_layout[i]) |
|
vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i], |
|
s->hwctx->alloc); |
|
} |
|
|
|
/* Also frees the descriptor sets */ |
|
if (pl->desc_pool) |
|
vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, |
|
s->hwctx->alloc); |
|
|
|
av_freep(&pl->desc_staging); |
|
av_freep(&pl->desc_set); |
|
av_freep(&pl->shaders); |
|
av_freep(&pl->desc_layout); |
|
av_freep(&pl->desc_template); |
|
av_freep(&pl->desc_set_initialized); |
|
av_freep(&pl->push_consts); |
|
pl->push_consts_num = 0; |
|
|
|
/* Only freed in case of failure */ |
|
av_freep(&pl->pool_size_desc); |
|
if (pl->desc_template_info) { |
|
for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { |
|
VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i]; |
|
av_free((void *)dt->pDescriptorUpdateEntries); |
|
} |
|
av_freep(&pl->desc_template_info); |
|
} |
|
|
|
av_free(pl); |
|
} |
|
|
|
void ff_vk_filter_uninit(AVFilterContext *avctx) |
|
{ |
|
FFVulkanContext *s = avctx->priv; |
|
FFVulkanFunctions *vk = &s->vkfn; |
|
|
|
ff_vk_glslang_uninit(); |
|
|
|
for (int i = 0; i < s->exec_ctx_num; i++) |
|
free_exec_ctx(s, s->exec_ctx[i]); |
|
av_freep(&s->exec_ctx); |
|
|
|
for (int i = 0; i < s->samplers_num; i++) { |
|
vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0], |
|
s->hwctx->alloc); |
|
av_free(s->samplers[i]); |
|
} |
|
av_freep(&s->samplers); |
|
|
|
for (int i = 0; i < s->pipelines_num; i++) |
|
free_pipeline(s, s->pipelines[i]); |
|
av_freep(&s->pipelines); |
|
|
|
av_freep(&s->scratch); |
|
s->scratch_size = 0; |
|
|
|
av_buffer_unref(&s->device_ref); |
|
av_buffer_unref(&s->frames_ref); |
|
}
|
|
|