lavfi/vulkan: refactor, fix and fully implement multiple queues

pull/374/head
Lynne 3 years ago
parent 246f841b53
commit f705e9ea05
No known key found for this signature in database
GPG Key ID: A2FEA5F03F034464
  1. 27
      libavfilter/vf_avgblur_vulkan.c
  2. 25
      libavfilter/vf_chromaber_vulkan.c
  3. 30
      libavfilter/vf_overlay_vulkan.c
  4. 27
      libavfilter/vf_scale_vulkan.c
  5. 340
      libavfilter/vulkan.c
  6. 103
      libavfilter/vulkan.h

@ -24,12 +24,13 @@
#define CGS 32
typedef struct AvgBlurVulkanContext {
VulkanFilterContext vkctx;
FFVulkanContext vkctx;
int initialized;
FFVkQueueFamilyCtx qf;
FFVkExecContext *exec;
VulkanPipeline *pl_hor;
VulkanPipeline *pl_ver;
FFVulkanPipeline *pl_hor;
FFVulkanPipeline *pl_ver;
/* Shader updators, must be in the main filter struct */
VkDescriptorImageInfo input_images[3];
@ -73,16 +74,14 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
FFSPIRVShader *shd;
AvgBlurVulkanContext *s = ctx->priv;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR);
VulkanDescriptorSetBinding desc_i[2] = {
FFVulkanDescriptorSetBinding desc_i[2] = {
{
.name = "input_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.samplers = DUP_SAMPLER_ARRAY4(*sampler),
},
{
.name = "output_img",
@ -95,17 +94,17 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
},
};
if (!sampler)
return AVERROR_EXTERNAL;
ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues;
desc_i[0].sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR);
if (!desc_i[0].sampler)
return AVERROR_EXTERNAL;
{ /* Create shader for the horizontal pass */
desc_i[0].updater = s->input_images;
desc_i[1].updater = s->tmp_images;
s->pl_hor = ff_vk_create_pipeline(ctx);
s->pl_hor = ff_vk_create_pipeline(ctx, &s->qf);
if (!s->pl_hor)
return AVERROR(ENOMEM);
@ -148,7 +147,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
desc_i[0].updater = s->tmp_images;
desc_i[1].updater = s->output_images;
s->pl_ver = ff_vk_create_pipeline(ctx);
s->pl_ver = ff_vk_create_pipeline(ctx, &s->qf);
if (!s->pl_ver)
return AVERROR(ENOMEM);
@ -188,7 +187,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
}
/* Execution context */
RET(ff_vk_create_exec_ctx(ctx, &s->exec));
RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
s->initialized = 1;
@ -311,6 +310,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f
if (err)
return err;
ff_vk_qf_rotate(&s->qf);
return err;
fail:

@ -24,11 +24,12 @@
#define CGROUPS (int [3]){ 32, 32, 1 }
typedef struct ChromaticAberrationVulkanContext {
VulkanFilterContext vkctx;
FFVulkanContext vkctx;
int initialized;
FFVkQueueFamilyCtx qf;
FFVkExecContext *exec;
VulkanPipeline *pl;
FFVulkanPipeline *pl;
/* Shader updators, must be in the main filter struct */
VkDescriptorImageInfo input_images[3];
@ -67,17 +68,18 @@ static const char distort_chroma_kernel[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
FFVkSampler *sampler;
ChromaticAberrationVulkanContext *s = ctx->priv;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
/* Create a sampler */
VkSampler *sampler = ff_vk_init_sampler(ctx, 0, VK_FILTER_LINEAR);
sampler = ff_vk_init_sampler(ctx, 0, VK_FILTER_LINEAR);
if (!sampler)
return AVERROR_EXTERNAL;
s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues;
s->pl = ff_vk_create_pipeline(ctx);
s->pl = ff_vk_create_pipeline(ctx, &s->qf);
if (!s->pl)
return AVERROR(ENOMEM);
@ -86,8 +88,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f;
{ /* Create the shader */
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
VulkanDescriptorSetBinding desc_i[2] = {
FFVulkanDescriptorSetBinding desc_i[2] = {
{
.name = "input_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
@ -95,7 +96,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.updater = s->input_images,
.samplers = DUP_SAMPLER_ARRAY4(*sampler),
.sampler = sampler,
},
{
.name = "output_img",
@ -158,7 +159,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
RET(ff_vk_init_compute_pipeline(ctx, s->pl));
/* Execution context */
RET(ff_vk_create_exec_ctx(ctx, &s->exec));
RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
s->initialized = 1;
@ -256,6 +257,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
if (err)
return err;
ff_vk_qf_rotate(&s->qf);
return err;
fail:

@ -25,11 +25,12 @@
#define CGROUPS (int [3]){ 32, 32, 1 }
typedef struct OverlayVulkanContext {
VulkanFilterContext vkctx;
FFVulkanContext vkctx;
int initialized;
VulkanPipeline *pl;
FFVkQueueFamilyCtx qf;
FFVkExecContext *exec;
FFVulkanPipeline *pl;
FFFrameSync fs;
FFVkBuffer params_buf;
@ -79,23 +80,24 @@ static const char overlay_alpha[] = {
static av_cold int init_filter(AVFilterContext *ctx)
{
int err;
FFVkSampler *sampler;
OverlayVulkanContext *s = ctx->priv;
VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_NEAREST);
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_NEAREST);
if (!sampler)
return AVERROR_EXTERNAL;
s->pl = ff_vk_create_pipeline(ctx);
s->pl = ff_vk_create_pipeline(ctx, &s->qf);
if (!s->pl)
return AVERROR(ENOMEM);
s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues;
{ /* Create the shader */
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
VulkanDescriptorSetBinding desc_i[3] = {
FFVulkanDescriptorSetBinding desc_i[3] = {
{
.name = "main_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
@ -103,7 +105,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.updater = s->main_images,
.samplers = DUP_SAMPLER_ARRAY4(*sampler),
.sampler = sampler,
},
{
.name = "overlay_img",
@ -112,7 +114,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
.elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.updater = s->overlay_images,
.samplers = DUP_SAMPLER_ARRAY4(*sampler),
.sampler = sampler,
},
{
.name = "output_img",
@ -126,7 +128,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
},
};
VulkanDescriptorSetBinding desc_b = {
FFVulkanDescriptorSetBinding desc_b = {
.name = "params",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.mem_quali = "readonly",
@ -215,7 +217,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
}
/* Execution context */
RET(ff_vk_create_exec_ctx(ctx, &s->exec));
RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
s->initialized = 1;
@ -339,6 +341,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
if (err)
return err;
ff_vk_qf_rotate(&s->qf);
return err;
fail:

@ -33,11 +33,12 @@ enum ScalerFunc {
};
typedef struct ScaleVulkanContext {
VulkanFilterContext vkctx;
FFVulkanContext vkctx;
int initialized;
FFVkQueueFamilyCtx qf;
FFVkExecContext *exec;
VulkanPipeline *pl;
FFVulkanPipeline *pl;
FFVkBuffer params_buf;
/* Shader updators, must be in the main filter struct */
@ -107,7 +108,7 @@ static const char write_444[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{
int err;
VkSampler *sampler;
FFVkSampler *sampler;
VkFilter sampler_mode;
ScaleVulkanContext *s = ctx->priv;
@ -115,9 +116,9 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
int crop_y = in->crop_top;
int crop_w = in->width - (in->crop_left + in->crop_right);
int crop_h = in->height - (in->crop_top + in->crop_bottom);
int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format);
s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues;
ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
switch (s->scaler) {
case F_NEAREST:
@ -133,20 +134,20 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
if (!sampler)
return AVERROR_EXTERNAL;
s->pl = ff_vk_create_pipeline(ctx);
s->pl = ff_vk_create_pipeline(ctx, &s->qf);
if (!s->pl)
return AVERROR(ENOMEM);
{ /* Create the shader */
VulkanDescriptorSetBinding desc_i[2] = {
FFVulkanDescriptorSetBinding desc_i[2] = {
{
.name = "input_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2,
.elems = av_pix_fmt_count_planes(s->vkctx.input_format),
.elems = in_planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.updater = s->input_images,
.samplers = DUP_SAMPLER_ARRAY4(*sampler),
.sampler = sampler,
},
{
.name = "output_img",
@ -160,7 +161,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
},
};
VulkanDescriptorSetBinding desc_b = {
FFVulkanDescriptorSetBinding desc_b = {
.name = "params",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.mem_quali = "readonly",
@ -178,7 +179,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS);
RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, desc_i, 2, 0)); /* set 0 */
RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, &desc_b, 1, 0)); /* set 0 */
RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */
GLSLD( scale_bilinear );
@ -280,7 +281,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
}
/* Execution context */
RET(ff_vk_create_exec_ctx(ctx, &s->exec));
RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
s->initialized = 1;
@ -384,6 +385,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
if (err)
return err;
ff_vk_qf_rotate(&s->qf);
return err;
fail:

@ -20,6 +20,7 @@
#include "vulkan.h"
#include "glslang.h"
#include "libavutil/avassert.h"
#include "libavutil/vulkan_loader.h"
/* Generic macro for creating contexts which need to keep their addresses
@ -88,15 +89,54 @@ const char *ff_vk_ret2str(VkResult res)
#undef CASE
}
void ff_vk_qf_init(AVFilterContext *avctx, FFVkQueueFamilyCtx *qf,
VkQueueFlagBits dev_family, int queue_limit)
{
FFVulkanContext *s = avctx->priv;
if (!queue_limit)
queue_limit = INT32_MAX;
switch (dev_family) {
case VK_QUEUE_GRAPHICS_BIT:
qf->queue_family = s->hwctx->queue_family_index;
qf->nb_queues = FFMIN(s->hwctx->nb_graphics_queues, queue_limit);
return;
case VK_QUEUE_COMPUTE_BIT:
qf->queue_family = s->hwctx->queue_family_comp_index;
qf->nb_queues = FFMIN(s->hwctx->nb_comp_queues, queue_limit);
return;
case VK_QUEUE_TRANSFER_BIT:
qf->queue_family = s->hwctx->queue_family_tx_index;
qf->nb_queues = FFMIN(s->hwctx->nb_tx_queues, queue_limit);
return;
case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
qf->queue_family = s->hwctx->queue_family_encode_index;
qf->nb_queues = FFMIN(s->hwctx->nb_encode_queues, queue_limit);
return;
case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
qf->queue_family = s->hwctx->queue_family_decode_index;
qf->nb_queues = FFMIN(s->hwctx->nb_decode_queues, queue_limit);
return;
default:
av_assert0(0); /* Should never happen */
}
return;
}
void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
{
qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
}
static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
{
VkResult ret;
int index = -1;
VkPhysicalDeviceProperties props;
VkPhysicalDeviceMemoryProperties mprops;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
VkMemoryAllocateInfo alloc_info = {
@ -104,24 +144,21 @@ static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
.pNext = alloc_extension,
};
vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &props);
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops);
/* Align if we need to */
if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment);
req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
alloc_info.allocationSize = req->size;
/* The vulkan spec requires memory types to be sorted in the "optimal"
* order, so the first matching type we find will be the best/fastest one */
for (int i = 0; i < mprops.memoryTypeCount; i++) {
for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
/* The memory type must be supported by the requirements (bitfield) */
if (!(req->memoryTypeBits & (1 << i)))
continue;
/* The memory type flags must include our properties */
if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
continue;
/* Found a suitable memory type */
@ -145,7 +182,7 @@ static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
return AVERROR(ENOMEM);
}
*mem_flags |= mprops.memoryTypes[index].propertyFlags;
*mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
return 0;
}
@ -156,7 +193,7 @@ int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
int err;
VkResult ret;
int use_ded_mem;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
VkBufferCreateInfo buf_spawn = {
@ -220,7 +257,7 @@ int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
int nb_buffers, int invalidate)
{
VkResult ret;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
VkMappedMemoryRange *inval_list = NULL;
int inval_count = 0;
@ -271,7 +308,7 @@ int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
{
int err = 0;
VkResult ret;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
VkMappedMemoryRange *flush_list = NULL;
int flush_count = 0;
@ -311,7 +348,7 @@ int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
{
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
if (!buf)
@ -323,7 +360,7 @@ void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
}
int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
int ff_vk_add_push_constant(AVFilterContext *avctx, FFVulkanPipeline *pl,
int offset, int size, VkShaderStageFlagBits stage)
{
VkPushConstantRange *pc;
@ -343,37 +380,37 @@ int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
return 0;
}
FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx,
FFVkQueueFamilyCtx *qf)
{
VkResult ret;
FFVkExecContext *e;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
int queue_family = s->queue_family_idx;
int nb_queues = s->queue_count;
VkCommandPoolCreateInfo cqueue_create = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = queue_family,
.queueFamilyIndex = qf->queue_family,
};
VkCommandBufferAllocateInfo cbuf_create = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = nb_queues,
.commandBufferCount = qf->nb_queues,
};
e = create_exec_ctx(s);
if (!e)
return AVERROR(ENOMEM);
e->queues = av_mallocz(nb_queues * sizeof(*e->queues));
e->qf = qf;
e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
if (!e->queues)
return AVERROR(ENOMEM);
e->bufs = av_mallocz(nb_queues * sizeof(*e->bufs));
e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
if (!e->bufs)
return AVERROR(ENOMEM);
@ -396,9 +433,9 @@ int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
return AVERROR_EXTERNAL;
}
for (int i = 0; i < nb_queues; i++) {
for (int i = 0; i < qf->nb_queues; i++) {
FFVkQueueCtx *q = &e->queues[i];
vk->GetDeviceQueue(s->hwctx->act_dev, queue_family, i, &q->queue);
vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, i, &q->queue);
}
*ctx = e;
@ -408,8 +445,7 @@ int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e)
{
VulkanFilterContext *s = avctx->priv;
FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
for (int j = 0; j < q->nb_buf_deps; j++)
av_buffer_unref(&q->buf_deps[j]);
@ -426,9 +462,9 @@ void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e)
int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
{
VkResult ret;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
VkCommandBufferBeginInfo cmd_start = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
@ -455,7 +491,7 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
/* Discard queue dependencies */
ff_vk_discard_exec_deps(avctx, e);
ret = vk->BeginCommandBuffer(e->bufs[s->cur_queue_idx], &cmd_start);
ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
ff_vk_ret2str(ret));
@ -467,17 +503,15 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e)
{
VulkanFilterContext *s = avctx->priv;
return e->bufs[s->cur_queue_idx];
return e->bufs[e->qf->cur_queue];
}
int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag)
{
AVFrame **dst;
VulkanFilterContext *s = avctx->priv;
AVVkFrame *f = (AVVkFrame *)frame->data[0];
FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
int planes = av_pix_fmt_count_planes(fc->sw_format);
@ -517,16 +551,21 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
return AVERROR(ENOMEM);
}
e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
(e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
if (!e->sem_sig_val_dst) {
ff_vk_discard_exec_deps(avctx, e);
return AVERROR(ENOMEM);
}
e->sem_wait[e->sem_wait_cnt] = f->sem[i];
e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
e->sem_wait_cnt++;
/* TODO: fix this in case execution fails */
f->sem_value[i]++;
e->sem_sig[e->sem_sig_cnt] = f->sem[i];
e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i];
e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
e->sem_sig_cnt++;
}
@ -551,9 +590,9 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
{
VkResult ret;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@ -568,7 +607,7 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
.pNext = &s_timeline_sem_info,
.commandBufferCount = 1,
.pCommandBuffers = &e->bufs[s->cur_queue_idx],
.pCommandBuffers = &e->bufs[e->qf->cur_queue],
.pWaitSemaphores = e->sem_wait,
.pWaitDstStageMask = e->sem_wait_dst,
@ -578,7 +617,7 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
.signalSemaphoreCount = e->sem_sig_cnt,
};
ret = vk->EndCommandBuffer(e->bufs[s->cur_queue_idx]);
ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
ff_vk_ret2str(ret));
@ -592,8 +631,8 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
return AVERROR_EXTERNAL;
}
/* Rotate queues */
s->cur_queue_idx = (s->cur_queue_idx + 1) % s->queue_count;
for (int i = 0; i < e->sem_sig_cnt; i++)
*e->sem_sig_val_dst[i] += 1;
return 0;
}
@ -602,8 +641,7 @@ int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e,
AVBufferRef **deps, int nb_deps)
{
AVBufferRef **dst;
VulkanFilterContext *s = avctx->priv;
FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
if (!deps || !nb_deps)
return 0;
@ -632,7 +670,7 @@ err:
static int vulkan_filter_set_device(AVFilterContext *avctx,
AVBufferRef *device)
{
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
av_buffer_unref(&s->device_ref);
@ -649,7 +687,7 @@ static int vulkan_filter_set_device(AVFilterContext *avctx,
static int vulkan_filter_set_frames(AVFilterContext *avctx,
AVBufferRef *frames)
{
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
av_buffer_unref(&s->frames_ref);
@ -664,7 +702,8 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
{
int err;
AVFilterContext *avctx = inlink->dst;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
AVHWFramesContext *input_frames;
if (!inlink->hw_frames_ctx) {
@ -695,6 +734,9 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
if (err < 0)
return err;
vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props);
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
/* Default output parameters match input parameters. */
s->input_format = input_frames->sw_format;
if (s->output_format == AV_PIX_FMT_NONE)
@ -711,7 +753,7 @@ int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
{
int err;
AVFilterContext *avctx = outlink->src;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
av_buffer_unref(&outlink->hw_frames_ctx);
@ -741,7 +783,7 @@ int ff_vk_filter_config_output(AVFilterLink *outlink)
{
int err;
AVFilterContext *avctx = outlink->src;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
AVBufferRef *output_frames_ref;
AVHWFramesContext *output_frames;
@ -790,7 +832,7 @@ fail:
int ff_vk_filter_init(AVFilterContext *avctx)
{
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
s->output_format = AV_PIX_FMT_NONE;
@ -800,12 +842,12 @@ int ff_vk_filter_init(AVFilterContext *avctx)
return 0;
}
FN_CREATING(VulkanFilterContext, VkSampler, sampler, samplers, samplers_num)
VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
VkFilter filt)
FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
FFVkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
VkFilter filt)
{
VkResult ret;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
VkSamplerCreateInfo sampler_info = {
@ -823,19 +865,22 @@ VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
.unnormalizedCoordinates = unnorm_coords,
};
VkSampler *sampler = create_sampler(s);
if (!sampler)
FFVkSampler *sctx = create_sampler(s);
if (!sctx)
return NULL;
ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
s->hwctx->alloc, sampler);
s->hwctx->alloc, &sctx->sampler[0]);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n",
ff_vk_ret2str(ret));
return NULL;
}
return sampler;
for (int i = 1; i < 4; i++)
sctx->sampler[i] = sctx->sampler[0];
return sctx;
}
int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
@ -863,7 +908,7 @@ typedef struct ImageViewCtx {
static void destroy_imageview(void *opaque, uint8_t *data)
{
VulkanFilterContext *s = opaque;
FFVulkanContext *s = opaque;
FFVulkanFunctions *vk = &s->vkfn;
ImageViewCtx *iv = (ImageViewCtx *)data;
@ -877,7 +922,7 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
{
int err;
AVBufferRef *buf;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
VkImageViewCreateInfo imgview_spawn = {
@ -924,8 +969,8 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
return 0;
}
FN_CREATING(VulkanPipeline, FFSPIRVShader, shader, shaders, shaders_num)
FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl,
FN_CREATING(FFVulkanPipeline, FFSPIRVShader, shader, shaders, shaders_num)
FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, FFVulkanPipeline *pl,
const char *name, VkShaderStageFlags stage)
{
FFSPIRVShader *shd = create_shader(pl);
@ -984,7 +1029,7 @@ int ff_vk_compile_shader(AVFilterContext *avctx, FFSPIRVShader *shd,
{
int err;
VkResult ret;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
VkShaderModuleCreateInfo shader_create;
uint8_t *spirv;
@ -1043,25 +1088,24 @@ static const struct descriptor_props {
[VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
};
int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
FFSPIRVShader *shd, VulkanDescriptorSetBinding *desc,
int ff_vk_add_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
FFSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
int num, int only_print_to_shader)
{
VkResult ret;
VkDescriptorSetLayout *layout;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
if (only_print_to_shader)
goto print;
pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
pl->desc_layout_num + 1);
pl->desc_layout_num + pl->qf->nb_queues);
if (!pl->desc_layout)
return AVERROR(ENOMEM);
layout = &pl->desc_layout[pl->desc_layout_num];
memset(layout, 0, sizeof(*layout));
{ /* Create descriptor set layout descriptions */
VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
@ -1076,21 +1120,27 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
desc_binding[i].descriptorType = desc[i].type;
desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
desc_binding[i].stageFlags = desc[i].stages;
desc_binding[i].pImmutableSamplers = desc[i].samplers;
desc_binding[i].pImmutableSamplers = desc[i].sampler ?
desc[i].sampler->sampler :
NULL;
}
desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
desc_create_layout.pBindings = desc_binding;
desc_create_layout.bindingCount = num;
ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
s->hwctx->alloc, layout);
av_free(desc_binding);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
"layout: %s\n", ff_vk_ret2str(ret));
return AVERROR_EXTERNAL;
for (int i = 0; i < pl->qf->nb_queues; i++) {
ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
s->hwctx->alloc, &layout[i]);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
"layout: %s\n", ff_vk_ret2str(ret));
av_free(desc_binding);
return AVERROR_EXTERNAL;
}
}
av_free(desc_binding);
}
{ /* Pool each descriptor by type and update pool counts */
@ -1108,7 +1158,7 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
}
pl->pool_size_desc[j].type = desc[i].type;
pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1);
pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
}
}
@ -1132,27 +1182,32 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
pl->desc_template_info = av_realloc_array(pl->desc_template_info,
sizeof(*pl->desc_template_info),
pl->desc_layout_num + 1);
pl->total_descriptor_sets + pl->qf->nb_queues);
if (!pl->desc_template_info)
return AVERROR(ENOMEM);
dt = &pl->desc_template_info[pl->desc_layout_num];
memset(dt, 0, sizeof(*dt));
dt = &pl->desc_template_info[pl->total_descriptor_sets];
memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
dt->descriptorSetLayout = *layout;
dt->pDescriptorUpdateEntries = des_entries;
dt->descriptorUpdateEntryCount = num;
for (int i = 0; i < pl->qf->nb_queues; i++) {
dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
dt[i].descriptorSetLayout = layout[i];
dt[i].pDescriptorUpdateEntries = des_entries;
dt[i].descriptorUpdateEntryCount = num;
}
}
pl->desc_layout_num++;
pl->descriptor_sets_num++;
pl->desc_layout_num += pl->qf->nb_queues;
pl->total_descriptor_sets += pl->qf->nb_queues;
print:
/* Write shader info */
for (int i = 0; i < num; i++) {
const struct descriptor_props *prop = &descriptor_props[desc[i].type];
GLSLA("layout (set = %i, binding = %i", pl->desc_layout_num - 1, i);
GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
if (desc[i].mem_layout)
GLSLA(", %s", desc[i].mem_layout);
@ -1184,12 +1239,14 @@ print:
return 0;
}
void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
void ff_vk_update_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
int set_id)
{
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
pl->desc_set[set_id],
pl->desc_template[set_id],
@ -1200,27 +1257,29 @@ void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e,
VkShaderStageFlagBits stage, int offset,
size_t size, void *src)
{
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
vk->CmdPushConstants(e->bufs[s->cur_queue_idx], e->bound_pl->pipeline_layout,
vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
stage, offset, size, src);
}
int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
int ff_vk_init_pipeline_layout(AVFilterContext *avctx, FFVulkanPipeline *pl)
{
VkResult ret;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
pl->descriptor_sets_num = pl->desc_layout_num * s->queue_count;
pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
if (!pl->desc_staging)
return AVERROR(ENOMEM);
{ /* Init descriptor set pool */
VkDescriptorPoolCreateInfo pool_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.poolSizeCount = pl->pool_size_desc_num,
.pPoolSizes = pl->pool_size_desc,
.maxSets = pl->descriptor_sets_num,
.maxSets = pl->total_descriptor_sets,
};
ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
@ -1237,11 +1296,11 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
VkDescriptorSetAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = pl->desc_pool,
.descriptorSetCount = pl->descriptor_sets_num,
.descriptorSetCount = pl->total_descriptor_sets,
.pSetLayouts = pl->desc_layout,
};
pl->desc_set = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_set));
pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
if (!pl->desc_set)
return AVERROR(ENOMEM);
@ -1257,12 +1316,14 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
{ /* Finally create the pipeline layout */
VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = pl->desc_layout_num,
.pSetLayouts = pl->desc_layout,
.pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging,
.pushConstantRangeCount = pl->push_consts_num,
.pPushConstantRanges = pl->push_consts,
};
for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
s->hwctx->alloc, &pl->pipeline_layout);
av_freep(&pl->push_consts);
@ -1275,21 +1336,19 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
}
{ /* Descriptor template (for tightly packed descriptors) */
VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
VkDescriptorUpdateTemplateCreateInfo *dt;
pl->desc_template = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_template));
pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
if (!pl->desc_template)
return AVERROR(ENOMEM);
/* Create update templates for the descriptor sets */
for (int i = 0; i < pl->descriptor_sets_num; i++) {
desc_template_info = &pl->desc_template_info[i % pl->desc_layout_num];
desc_template_info->pipelineLayout = pl->pipeline_layout;
for (int i = 0; i < pl->total_descriptor_sets; i++) {
dt = &pl->desc_template_info[i];
dt->pipelineLayout = pl->pipeline_layout;
ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
desc_template_info,
s->hwctx->alloc,
dt, s->hwctx->alloc,
&pl->desc_template[i]);
av_free((void *)desc_template_info->pDescriptorUpdateEntries);
if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor "
"template: %s\n", ff_vk_ret2str(ret));
@ -1297,23 +1356,34 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
}
}
/* Free the duplicated memory used for the template entries */
for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
dt = &pl->desc_template_info[i];
av_free((void *)dt->pDescriptorUpdateEntries);
}
av_freep(&pl->desc_template_info);
}
return 0;
}
FN_CREATING(VulkanFilterContext, VulkanPipeline, pipeline, pipelines, pipelines_num)
VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx)
FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
FFVulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx,
FFVkQueueFamilyCtx *qf)
{
return create_pipeline(avctx->priv);
FFVulkanPipeline *pl = create_pipeline(avctx->priv);
if (pl)
pl->qf = qf;
return pl;
}
int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl)
int ff_vk_init_compute_pipeline(AVFilterContext *avctx, FFVulkanPipeline *pl)
{
int i;
VkResult ret;
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
VkComputePipelineCreateInfo pipe = {
@ -1346,26 +1416,31 @@ int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl)
}
void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
VulkanPipeline *pl)
FFVulkanPipeline *pl)
{
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
vk->CmdBindPipeline(e->bufs[s->cur_queue_idx], pl->bind_point, pl->pipeline);
vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
vk->CmdBindDescriptorSets(e->bufs[s->cur_queue_idx], pl->bind_point,
pl->pipeline_layout, 0, pl->descriptor_sets_num,
pl->desc_set, 0, 0);
for (int i = 0; i < pl->descriptor_sets_num; i++)
pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
pl->pipeline_layout, 0,
pl->descriptor_sets_num,
(VkDescriptorSet *)pl->desc_staging,
0, NULL);
e->bound_pl = pl;
}
static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
{
FFVulkanFunctions *vk = &s->vkfn;
/* Make sure all queues have finished executing */
for (int i = 0; i < s->queue_count; i++) {
for (int i = 0; i < e->qf->nb_queues; i++) {
FFVkQueueCtx *q = &e->queues[i];
if (q->fence) {
@ -1389,7 +1464,7 @@ static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
}
if (e->bufs)
vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, s->queue_count, e->bufs);
vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
if (e->pool)
vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
@ -1397,13 +1472,14 @@ static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
av_freep(&e->queues);
av_freep(&e->sem_sig);
av_freep(&e->sem_sig_val);
av_freep(&e->sem_sig_val_dst);
av_freep(&e->sem_wait);
av_freep(&e->sem_wait_dst);
av_freep(&e->sem_wait_val);
av_free(e);
}
static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
{
FFVulkanFunctions *vk = &s->vkfn;
@ -1433,6 +1509,7 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
s->hwctx->alloc);
av_freep(&pl->desc_staging);
av_freep(&pl->desc_set);
av_freep(&pl->shaders);
av_freep(&pl->desc_layout);
@ -1443,8 +1520,10 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
/* Only freed in case of failure */
av_freep(&pl->pool_size_desc);
if (pl->desc_template_info) {
for (int i = 0; i < pl->descriptor_sets_num; i++)
av_free((void *)pl->desc_template_info[i].pDescriptorUpdateEntries);
for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
av_free((void *)dt->pDescriptorUpdateEntries);
}
av_freep(&pl->desc_template_info);
}
@ -1453,7 +1532,7 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
void ff_vk_filter_uninit(AVFilterContext *avctx)
{
VulkanFilterContext *s = avctx->priv;
FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
ff_vk_glslang_uninit();
@ -1463,7 +1542,8 @@ void ff_vk_filter_uninit(AVFilterContext *avctx)
av_freep(&s->exec_ctx);
for (int i = 0; i < s->samplers_num; i++) {
vk->DestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc);
vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
s->hwctx->alloc);
av_free(s->samplers[i]);
}
av_freep(&s->samplers);

@ -20,6 +20,7 @@
#define AVFILTER_VULKAN_H
#define VK_NO_PROTOTYPES
#define VK_ENABLE_BETA_EXTENSIONS
#include "avfilter.h"
#include "libavutil/pixdesc.h"
@ -52,9 +53,6 @@
goto fail; \
} while (0)
/* Useful for attaching immutable samplers to arrays */
#define DUP_SAMPLER_ARRAY4(x) (VkSampler []){ x, x, x, x, }
typedef struct FFSPIRVShader {
const char *name; /* Name for id/debugging purposes */
AVBPrint src;
@ -62,7 +60,11 @@ typedef struct FFSPIRVShader {
VkPipelineShaderStageCreateInfo shader;
} FFSPIRVShader;
typedef struct VulkanDescriptorSetBinding {
typedef struct FFVkSampler {
VkSampler sampler[4];
} FFVkSampler;
typedef struct FFVulkanDescriptorSetBinding {
const char *name;
VkDescriptorType type;
const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */
@ -71,9 +73,9 @@ typedef struct VulkanDescriptorSetBinding {
uint32_t dimensions; /* Needed for e.g. sampler%iD */
uint32_t elems; /* 0 - scalar, 1 or more - vector */
VkShaderStageFlags stages;
const VkSampler *samplers; /* Immutable samplers, length - #elems */
FFVkSampler *sampler; /* Sampler to use for all elems */
void *updater; /* Pointer to VkDescriptor*Info */
} VulkanDescriptorSetBinding;
} FFVulkanDescriptorSetBinding;
typedef struct FFVkBuffer {
VkBuffer buf;
@ -81,7 +83,15 @@ typedef struct FFVkBuffer {
VkMemoryPropertyFlagBits flags;
} FFVkBuffer;
typedef struct VulkanPipeline {
typedef struct FFVkQueueFamilyCtx {
int queue_family;
int nb_queues;
int cur_queue;
} FFVkQueueFamilyCtx;
typedef struct FFVulkanPipeline {
FFVkQueueFamilyCtx *qf;
VkPipelineBindPoint bind_point;
/* Contexts */
@ -97,18 +107,21 @@ typedef struct VulkanPipeline {
int push_consts_num;
/* Descriptors */
VkDescriptorSetLayout *desc_layout;
VkDescriptorPool desc_pool;
VkDescriptorSet *desc_set;
VkDescriptorUpdateTemplate *desc_template;
int desc_layout_num;
int descriptor_sets_num;
int pool_size_desc_num;
VkDescriptorSetLayout *desc_layout;
VkDescriptorPool desc_pool;
VkDescriptorSet *desc_set;
void **desc_staging;
VkDescriptorSetLayoutBinding **desc_binding;
VkDescriptorUpdateTemplate *desc_template;
int desc_layout_num;
int descriptor_sets_num;
int total_descriptor_sets;
int pool_size_desc_num;
/* Temporary, used to store data in between initialization stages */
VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
VkDescriptorPoolSize *pool_size_desc;
} VulkanPipeline;
} FFVulkanPipeline;
typedef struct FFVkQueueCtx {
VkFence fence;
@ -126,6 +139,8 @@ typedef struct FFVkQueueCtx {
} FFVkQueueCtx;
typedef struct FFVkExecContext {
FFVkQueueFamilyCtx *qf;
VkCommandPool pool;
VkCommandBuffer *bufs;
FFVkQueueCtx *queues;
@ -134,7 +149,7 @@ typedef struct FFVkExecContext {
int *nb_deps;
int *dep_alloc_size;
VulkanPipeline *bound_pl;
FFVulkanPipeline *bound_pl;
VkSemaphore *sem_wait;
int sem_wait_alloc; /* Allocated sem_wait */
@ -152,23 +167,23 @@ typedef struct FFVkExecContext {
uint64_t *sem_sig_val;
int sem_sig_val_alloc;
uint64_t **sem_sig_val_dst;
int sem_sig_val_dst_alloc;
} FFVkExecContext;
typedef struct VulkanFilterContext {
typedef struct FFVulkanContext {
const AVClass *class;
FFVulkanFunctions vkfn;
FFVulkanExtensions extensions;
VkPhysicalDeviceProperties props;
VkPhysicalDeviceMemoryProperties mprops;
AVBufferRef *device_ref;
AVBufferRef *frames_ref; /* For in-place filtering */
AVHWDeviceContext *device;
AVVulkanDeviceContext *hwctx;
/* State - mirrored with the exec ctx */
int cur_queue_idx;
int queue_family_idx;
int queue_count;
/* Properties */
int output_width;
int output_height;
@ -176,7 +191,7 @@ typedef struct VulkanFilterContext {
enum AVPixelFormat input_format;
/* Samplers */
VkSampler **samplers;
FFVkSampler **samplers;
int samplers_num;
/* Exec contexts */
@ -184,12 +199,12 @@ typedef struct VulkanFilterContext {
int exec_ctx_num;
/* Pipelines (each can have 1 shader of each type) */
VulkanPipeline **pipelines;
FFVulkanPipeline **pipelines;
int pipelines_num;
void *scratch; /* Scratch memory used only in functions */
unsigned int scratch_size;
} VulkanFilterContext;
} FFVulkanContext;
/* Identity mapping - r = r, b = b, g = g, a = a */
extern const VkComponentMapping ff_comp_identity_map;
@ -218,11 +233,23 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
*/
const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
/**
* Initialize a queue family.
* A queue limit of 0 means no limit.
*/
void ff_vk_qf_init(AVFilterContext *avctx, FFVkQueueFamilyCtx *qf,
VkQueueFlagBits dev_family, int queue_limit);
/**
* Rotate through the queues in a queue family.
*/
void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
/**
* Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
*/
VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
VkFilter filt);
FFVkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
VkFilter filt);
/**
* Create an imageview.
@ -237,19 +264,20 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
* Define a push constant for a given stage into a pipeline.
* Must be called before the pipeline layout has been initialized.
*/
int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
int ff_vk_add_push_constant(AVFilterContext *avctx, FFVulkanPipeline *pl,
int offset, int size, VkShaderStageFlagBits stage);
/**
* Inits a pipeline. Everything in it will be auto-freed when calling
* ff_vk_filter_uninit().
*/
VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx);
FFVulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx,
FFVkQueueFamilyCtx *qf);
/**
* Inits a shader for a specific pipeline. Will be auto-freed on uninit.
*/
FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl,
FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, FFVulkanPipeline *pl,
const char *name, VkShaderStageFlags stage);
/**
@ -261,8 +289,8 @@ void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, FFSPIRVShader *shd,
/**
* Adds a descriptor set to the shader and registers them in the pipeline.
*/
int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
FFSPIRVShader *shd, VulkanDescriptorSetBinding *desc,
int ff_vk_add_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
FFSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
int num, int only_print_to_shader);
/**
@ -280,27 +308,28 @@ void ff_vk_print_shader(AVFilterContext *avctx, FFSPIRVShader *shd, int prio);
* Initializes the pipeline layout after all shaders and descriptor sets have
* been finished.
*/
int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl);
int ff_vk_init_pipeline_layout(AVFilterContext *avctx, FFVulkanPipeline *pl);
/**
* Initializes a compute pipeline. Will pick the first shader with the
* COMPUTE flag set.
*/
int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl);
int ff_vk_init_compute_pipeline(AVFilterContext *avctx, FFVulkanPipeline *pl);
/**
* Updates a descriptor set via the updaters defined.
* Can be called immediately after pipeline creation, but must be called
* at least once before queue submission.
*/
void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
void ff_vk_update_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
int set_id);
/**
* Init an execution context for command recording and queue submission.
* WIll be auto-freed on uninit.
*/
int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx);
int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx,
FFVkQueueFamilyCtx *qf);
/**
* Begin recording to the command buffer. Previous execution must have been
@ -313,7 +342,7 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e);
* Must be called after ff_vk_start_exec_recording() and before submission.
*/
void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
VulkanPipeline *pl);
FFVulkanPipeline *pl);
/**
* Updates push constants.

Loading…
Cancel
Save