lavfi/vulkan: refactor, fix and fully implement multiple queues

pull/374/head
Lynne 3 years ago
parent 246f841b53
commit f705e9ea05
No known key found for this signature in database
GPG Key ID: A2FEA5F03F034464
  1. 27
      libavfilter/vf_avgblur_vulkan.c
  2. 25
      libavfilter/vf_chromaber_vulkan.c
  3. 30
      libavfilter/vf_overlay_vulkan.c
  4. 27
      libavfilter/vf_scale_vulkan.c
  5. 328
      libavfilter/vulkan.c
  6. 87
      libavfilter/vulkan.h

@ -24,12 +24,13 @@
#define CGS 32 #define CGS 32
typedef struct AvgBlurVulkanContext { typedef struct AvgBlurVulkanContext {
VulkanFilterContext vkctx; FFVulkanContext vkctx;
int initialized; int initialized;
FFVkQueueFamilyCtx qf;
FFVkExecContext *exec; FFVkExecContext *exec;
VulkanPipeline *pl_hor; FFVulkanPipeline *pl_hor;
VulkanPipeline *pl_ver; FFVulkanPipeline *pl_ver;
/* Shader updators, must be in the main filter struct */ /* Shader updators, must be in the main filter struct */
VkDescriptorImageInfo input_images[3]; VkDescriptorImageInfo input_images[3];
@ -73,16 +74,14 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
FFSPIRVShader *shd; FFSPIRVShader *shd;
AvgBlurVulkanContext *s = ctx->priv; AvgBlurVulkanContext *s = ctx->priv;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR);
VulkanDescriptorSetBinding desc_i[2] = { FFVulkanDescriptorSetBinding desc_i[2] = {
{ {
.name = "input_img", .name = "input_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2, .dimensions = 2,
.elems = planes, .elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT, .stages = VK_SHADER_STAGE_COMPUTE_BIT,
.samplers = DUP_SAMPLER_ARRAY4(*sampler),
}, },
{ {
.name = "output_img", .name = "output_img",
@ -95,17 +94,17 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
}, },
}; };
if (!sampler) ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
return AVERROR_EXTERNAL;
s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; desc_i[0].sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_LINEAR);
s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues; if (!desc_i[0].sampler)
return AVERROR_EXTERNAL;
{ /* Create shader for the horizontal pass */ { /* Create shader for the horizontal pass */
desc_i[0].updater = s->input_images; desc_i[0].updater = s->input_images;
desc_i[1].updater = s->tmp_images; desc_i[1].updater = s->tmp_images;
s->pl_hor = ff_vk_create_pipeline(ctx); s->pl_hor = ff_vk_create_pipeline(ctx, &s->qf);
if (!s->pl_hor) if (!s->pl_hor)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
@ -148,7 +147,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
desc_i[0].updater = s->tmp_images; desc_i[0].updater = s->tmp_images;
desc_i[1].updater = s->output_images; desc_i[1].updater = s->output_images;
s->pl_ver = ff_vk_create_pipeline(ctx); s->pl_ver = ff_vk_create_pipeline(ctx, &s->qf);
if (!s->pl_ver) if (!s->pl_ver)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
@ -188,7 +187,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
} }
/* Execution context */ /* Execution context */
RET(ff_vk_create_exec_ctx(ctx, &s->exec)); RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
s->initialized = 1; s->initialized = 1;
@ -311,6 +310,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *tmp_f
if (err) if (err)
return err; return err;
ff_vk_qf_rotate(&s->qf);
return err; return err;
fail: fail:

@ -24,11 +24,12 @@
#define CGROUPS (int [3]){ 32, 32, 1 } #define CGROUPS (int [3]){ 32, 32, 1 }
typedef struct ChromaticAberrationVulkanContext { typedef struct ChromaticAberrationVulkanContext {
VulkanFilterContext vkctx; FFVulkanContext vkctx;
int initialized; int initialized;
FFVkQueueFamilyCtx qf;
FFVkExecContext *exec; FFVkExecContext *exec;
VulkanPipeline *pl; FFVulkanPipeline *pl;
/* Shader updators, must be in the main filter struct */ /* Shader updators, must be in the main filter struct */
VkDescriptorImageInfo input_images[3]; VkDescriptorImageInfo input_images[3];
@ -67,17 +68,18 @@ static const char distort_chroma_kernel[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{ {
int err; int err;
FFVkSampler *sampler;
ChromaticAberrationVulkanContext *s = ctx->priv; ChromaticAberrationVulkanContext *s = ctx->priv;
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
/* Create a sampler */ /* Create a sampler */
VkSampler *sampler = ff_vk_init_sampler(ctx, 0, VK_FILTER_LINEAR); sampler = ff_vk_init_sampler(ctx, 0, VK_FILTER_LINEAR);
if (!sampler) if (!sampler)
return AVERROR_EXTERNAL; return AVERROR_EXTERNAL;
s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; s->pl = ff_vk_create_pipeline(ctx, &s->qf);
s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues;
s->pl = ff_vk_create_pipeline(ctx);
if (!s->pl) if (!s->pl)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
@ -86,8 +88,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f; s->opts.dist[1] = (s->opts.dist[1] / 100.0f) + 1.0f;
{ /* Create the shader */ { /* Create the shader */
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format); FFVulkanDescriptorSetBinding desc_i[2] = {
VulkanDescriptorSetBinding desc_i[2] = {
{ {
.name = "input_img", .name = "input_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
@ -95,7 +96,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
.elems = planes, .elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT, .stages = VK_SHADER_STAGE_COMPUTE_BIT,
.updater = s->input_images, .updater = s->input_images,
.samplers = DUP_SAMPLER_ARRAY4(*sampler), .sampler = sampler,
}, },
{ {
.name = "output_img", .name = "output_img",
@ -158,7 +159,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
RET(ff_vk_init_compute_pipeline(ctx, s->pl)); RET(ff_vk_init_compute_pipeline(ctx, s->pl));
/* Execution context */ /* Execution context */
RET(ff_vk_create_exec_ctx(ctx, &s->exec)); RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
s->initialized = 1; s->initialized = 1;
@ -256,6 +257,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
if (err) if (err)
return err; return err;
ff_vk_qf_rotate(&s->qf);
return err; return err;
fail: fail:

@ -25,11 +25,12 @@
#define CGROUPS (int [3]){ 32, 32, 1 } #define CGROUPS (int [3]){ 32, 32, 1 }
typedef struct OverlayVulkanContext { typedef struct OverlayVulkanContext {
VulkanFilterContext vkctx; FFVulkanContext vkctx;
int initialized; int initialized;
VulkanPipeline *pl; FFVkQueueFamilyCtx qf;
FFVkExecContext *exec; FFVkExecContext *exec;
FFVulkanPipeline *pl;
FFFrameSync fs; FFFrameSync fs;
FFVkBuffer params_buf; FFVkBuffer params_buf;
@ -79,23 +80,24 @@ static const char overlay_alpha[] = {
static av_cold int init_filter(AVFilterContext *ctx) static av_cold int init_filter(AVFilterContext *ctx)
{ {
int err; int err;
FFVkSampler *sampler;
OverlayVulkanContext *s = ctx->priv; OverlayVulkanContext *s = ctx->priv;
VkSampler *sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_NEAREST); const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
sampler = ff_vk_init_sampler(ctx, 1, VK_FILTER_NEAREST);
if (!sampler) if (!sampler)
return AVERROR_EXTERNAL; return AVERROR_EXTERNAL;
s->pl = ff_vk_create_pipeline(ctx); s->pl = ff_vk_create_pipeline(ctx, &s->qf);
if (!s->pl) if (!s->pl)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index;
s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues;
{ /* Create the shader */ { /* Create the shader */
const int planes = av_pix_fmt_count_planes(s->vkctx.output_format);
const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA; const int ialpha = av_pix_fmt_desc_get(s->vkctx.input_format)->flags & AV_PIX_FMT_FLAG_ALPHA;
VulkanDescriptorSetBinding desc_i[3] = { FFVulkanDescriptorSetBinding desc_i[3] = {
{ {
.name = "main_img", .name = "main_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
@ -103,7 +105,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
.elems = planes, .elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT, .stages = VK_SHADER_STAGE_COMPUTE_BIT,
.updater = s->main_images, .updater = s->main_images,
.samplers = DUP_SAMPLER_ARRAY4(*sampler), .sampler = sampler,
}, },
{ {
.name = "overlay_img", .name = "overlay_img",
@ -112,7 +114,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
.elems = planes, .elems = planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT, .stages = VK_SHADER_STAGE_COMPUTE_BIT,
.updater = s->overlay_images, .updater = s->overlay_images,
.samplers = DUP_SAMPLER_ARRAY4(*sampler), .sampler = sampler,
}, },
{ {
.name = "output_img", .name = "output_img",
@ -126,7 +128,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
}, },
}; };
VulkanDescriptorSetBinding desc_b = { FFVulkanDescriptorSetBinding desc_b = {
.name = "params", .name = "params",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.mem_quali = "readonly", .mem_quali = "readonly",
@ -215,7 +217,7 @@ static av_cold int init_filter(AVFilterContext *ctx)
} }
/* Execution context */ /* Execution context */
RET(ff_vk_create_exec_ctx(ctx, &s->exec)); RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
s->initialized = 1; s->initialized = 1;
@ -339,6 +341,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f,
if (err) if (err)
return err; return err;
ff_vk_qf_rotate(&s->qf);
return err; return err;
fail: fail:

@ -33,11 +33,12 @@ enum ScalerFunc {
}; };
typedef struct ScaleVulkanContext { typedef struct ScaleVulkanContext {
VulkanFilterContext vkctx; FFVulkanContext vkctx;
int initialized; int initialized;
FFVkQueueFamilyCtx qf;
FFVkExecContext *exec; FFVkExecContext *exec;
VulkanPipeline *pl; FFVulkanPipeline *pl;
FFVkBuffer params_buf; FFVkBuffer params_buf;
/* Shader updators, must be in the main filter struct */ /* Shader updators, must be in the main filter struct */
@ -107,7 +108,7 @@ static const char write_444[] = {
static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in) static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
{ {
int err; int err;
VkSampler *sampler; FFVkSampler *sampler;
VkFilter sampler_mode; VkFilter sampler_mode;
ScaleVulkanContext *s = ctx->priv; ScaleVulkanContext *s = ctx->priv;
@ -115,9 +116,9 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
int crop_y = in->crop_top; int crop_y = in->crop_top;
int crop_w = in->width - (in->crop_left + in->crop_right); int crop_w = in->width - (in->crop_left + in->crop_right);
int crop_h = in->height - (in->crop_top + in->crop_bottom); int crop_h = in->height - (in->crop_top + in->crop_bottom);
int in_planes = av_pix_fmt_count_planes(s->vkctx.input_format);
s->vkctx.queue_family_idx = s->vkctx.hwctx->queue_family_comp_index; ff_vk_qf_init(ctx, &s->qf, VK_QUEUE_COMPUTE_BIT, 0);
s->vkctx.queue_count = s->vkctx.hwctx->nb_comp_queues;
switch (s->scaler) { switch (s->scaler) {
case F_NEAREST: case F_NEAREST:
@ -133,20 +134,20 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
if (!sampler) if (!sampler)
return AVERROR_EXTERNAL; return AVERROR_EXTERNAL;
s->pl = ff_vk_create_pipeline(ctx); s->pl = ff_vk_create_pipeline(ctx, &s->qf);
if (!s->pl) if (!s->pl)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
{ /* Create the shader */ { /* Create the shader */
VulkanDescriptorSetBinding desc_i[2] = { FFVulkanDescriptorSetBinding desc_i[2] = {
{ {
.name = "input_img", .name = "input_img",
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
.dimensions = 2, .dimensions = 2,
.elems = av_pix_fmt_count_planes(s->vkctx.input_format), .elems = in_planes,
.stages = VK_SHADER_STAGE_COMPUTE_BIT, .stages = VK_SHADER_STAGE_COMPUTE_BIT,
.updater = s->input_images, .updater = s->input_images,
.samplers = DUP_SAMPLER_ARRAY4(*sampler), .sampler = sampler,
}, },
{ {
.name = "output_img", .name = "output_img",
@ -160,7 +161,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
}, },
}; };
VulkanDescriptorSetBinding desc_b = { FFVulkanDescriptorSetBinding desc_b = {
.name = "params", .name = "params",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.mem_quali = "readonly", .mem_quali = "readonly",
@ -178,7 +179,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS); ff_vk_set_compute_shader_sizes(ctx, shd, CGROUPS);
RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, desc_i, 2, 0)); /* set 0 */ RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, desc_i, 2, 0)); /* set 0 */
RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, &desc_b, 1, 0)); /* set 0 */ RET(ff_vk_add_descriptor_set(ctx, s->pl, shd, &desc_b, 1, 0)); /* set 1 */
GLSLD( scale_bilinear ); GLSLD( scale_bilinear );
@ -280,7 +281,7 @@ static av_cold int init_filter(AVFilterContext *ctx, AVFrame *in)
} }
/* Execution context */ /* Execution context */
RET(ff_vk_create_exec_ctx(ctx, &s->exec)); RET(ff_vk_create_exec_ctx(ctx, &s->exec, &s->qf));
s->initialized = 1; s->initialized = 1;
@ -384,6 +385,8 @@ static int process_frames(AVFilterContext *avctx, AVFrame *out_f, AVFrame *in_f)
if (err) if (err)
return err; return err;
ff_vk_qf_rotate(&s->qf);
return err; return err;
fail: fail:

@ -20,6 +20,7 @@
#include "vulkan.h" #include "vulkan.h"
#include "glslang.h" #include "glslang.h"
#include "libavutil/avassert.h"
#include "libavutil/vulkan_loader.h" #include "libavutil/vulkan_loader.h"
/* Generic macro for creating contexts which need to keep their addresses /* Generic macro for creating contexts which need to keep their addresses
@ -88,15 +89,54 @@ const char *ff_vk_ret2str(VkResult res)
#undef CASE #undef CASE
} }
void ff_vk_qf_init(AVFilterContext *avctx, FFVkQueueFamilyCtx *qf,
VkQueueFlagBits dev_family, int queue_limit)
{
FFVulkanContext *s = avctx->priv;
if (!queue_limit)
queue_limit = INT32_MAX;
switch (dev_family) {
case VK_QUEUE_GRAPHICS_BIT:
qf->queue_family = s->hwctx->queue_family_index;
qf->nb_queues = FFMIN(s->hwctx->nb_graphics_queues, queue_limit);
return;
case VK_QUEUE_COMPUTE_BIT:
qf->queue_family = s->hwctx->queue_family_comp_index;
qf->nb_queues = FFMIN(s->hwctx->nb_comp_queues, queue_limit);
return;
case VK_QUEUE_TRANSFER_BIT:
qf->queue_family = s->hwctx->queue_family_tx_index;
qf->nb_queues = FFMIN(s->hwctx->nb_tx_queues, queue_limit);
return;
case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
qf->queue_family = s->hwctx->queue_family_encode_index;
qf->nb_queues = FFMIN(s->hwctx->nb_encode_queues, queue_limit);
return;
case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
qf->queue_family = s->hwctx->queue_family_decode_index;
qf->nb_queues = FFMIN(s->hwctx->nb_decode_queues, queue_limit);
return;
default:
av_assert0(0); /* Should never happen */
}
return;
}
void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
{
qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
}
static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req, static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
VkMemoryPropertyFlagBits req_flags, void *alloc_extension, VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
{ {
VkResult ret; VkResult ret;
int index = -1; int index = -1;
VkPhysicalDeviceProperties props; FFVulkanContext *s = avctx->priv;
VkPhysicalDeviceMemoryProperties mprops;
VulkanFilterContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
VkMemoryAllocateInfo alloc_info = { VkMemoryAllocateInfo alloc_info = {
@ -104,24 +144,21 @@ static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
.pNext = alloc_extension, .pNext = alloc_extension,
}; };
vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &props);
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops);
/* Align if we need to */ /* Align if we need to */
if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment); req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
alloc_info.allocationSize = req->size; alloc_info.allocationSize = req->size;
/* The vulkan spec requires memory types to be sorted in the "optimal" /* The vulkan spec requires memory types to be sorted in the "optimal"
* order, so the first matching type we find will be the best/fastest one */ * order, so the first matching type we find will be the best/fastest one */
for (int i = 0; i < mprops.memoryTypeCount; i++) { for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
/* The memory type must be supported by the requirements (bitfield) */ /* The memory type must be supported by the requirements (bitfield) */
if (!(req->memoryTypeBits & (1 << i))) if (!(req->memoryTypeBits & (1 << i)))
continue; continue;
/* The memory type flags must include our properties */ /* The memory type flags must include our properties */
if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
continue; continue;
/* Found a suitable memory type */ /* Found a suitable memory type */
@ -145,7 +182,7 @@ static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req,
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
*mem_flags |= mprops.memoryTypes[index].propertyFlags; *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
return 0; return 0;
} }
@ -156,7 +193,7 @@ int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size,
int err; int err;
VkResult ret; VkResult ret;
int use_ded_mem; int use_ded_mem;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
VkBufferCreateInfo buf_spawn = { VkBufferCreateInfo buf_spawn = {
@ -220,7 +257,7 @@ int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[],
int nb_buffers, int invalidate) int nb_buffers, int invalidate)
{ {
VkResult ret; VkResult ret;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
VkMappedMemoryRange *inval_list = NULL; VkMappedMemoryRange *inval_list = NULL;
int inval_count = 0; int inval_count = 0;
@ -271,7 +308,7 @@ int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
{ {
int err = 0; int err = 0;
VkResult ret; VkResult ret;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
VkMappedMemoryRange *flush_list = NULL; VkMappedMemoryRange *flush_list = NULL;
int flush_count = 0; int flush_count = 0;
@ -311,7 +348,7 @@ int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers,
void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf) void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
{ {
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
if (!buf) if (!buf)
@ -323,7 +360,7 @@ void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf)
vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
} }
int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, int ff_vk_add_push_constant(AVFilterContext *avctx, FFVulkanPipeline *pl,
int offset, int size, VkShaderStageFlagBits stage) int offset, int size, VkShaderStageFlagBits stage)
{ {
VkPushConstantRange *pc; VkPushConstantRange *pc;
@ -343,37 +380,37 @@ int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl,
return 0; return 0;
} }
FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx) int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx,
FFVkQueueFamilyCtx *qf)
{ {
VkResult ret; VkResult ret;
FFVkExecContext *e; FFVkExecContext *e;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
int queue_family = s->queue_family_idx;
int nb_queues = s->queue_count;
VkCommandPoolCreateInfo cqueue_create = { VkCommandPoolCreateInfo cqueue_create = {
.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
.queueFamilyIndex = queue_family, .queueFamilyIndex = qf->queue_family,
}; };
VkCommandBufferAllocateInfo cbuf_create = { VkCommandBufferAllocateInfo cbuf_create = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
.commandBufferCount = nb_queues, .commandBufferCount = qf->nb_queues,
}; };
e = create_exec_ctx(s); e = create_exec_ctx(s);
if (!e) if (!e)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
e->queues = av_mallocz(nb_queues * sizeof(*e->queues)); e->qf = qf;
e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
if (!e->queues) if (!e->queues)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
e->bufs = av_mallocz(nb_queues * sizeof(*e->bufs)); e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
if (!e->bufs) if (!e->bufs)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
@ -396,9 +433,9 @@ int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
return AVERROR_EXTERNAL; return AVERROR_EXTERNAL;
} }
for (int i = 0; i < nb_queues; i++) { for (int i = 0; i < qf->nb_queues; i++) {
FFVkQueueCtx *q = &e->queues[i]; FFVkQueueCtx *q = &e->queues[i];
vk->GetDeviceQueue(s->hwctx->act_dev, queue_family, i, &q->queue); vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, i, &q->queue);
} }
*ctx = e; *ctx = e;
@ -408,8 +445,7 @@ int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx)
void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e) void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e)
{ {
VulkanFilterContext *s = avctx->priv; FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
for (int j = 0; j < q->nb_buf_deps; j++) for (int j = 0; j < q->nb_buf_deps; j++)
av_buffer_unref(&q->buf_deps[j]); av_buffer_unref(&q->buf_deps[j]);
@ -426,9 +462,9 @@ void ff_vk_discard_exec_deps(AVFilterContext *avctx, FFVkExecContext *e)
int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e) int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
{ {
VkResult ret; VkResult ret;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
VkCommandBufferBeginInfo cmd_start = { VkCommandBufferBeginInfo cmd_start = {
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
@ -455,7 +491,7 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
/* Discard queue dependencies */ /* Discard queue dependencies */
ff_vk_discard_exec_deps(avctx, e); ff_vk_discard_exec_deps(avctx, e);
ret = vk->BeginCommandBuffer(e->bufs[s->cur_queue_idx], &cmd_start); ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
if (ret != VK_SUCCESS) { if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n", av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
ff_vk_ret2str(ret)); ff_vk_ret2str(ret));
@ -467,17 +503,15 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e)
VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e) VkCommandBuffer ff_vk_get_exec_buf(AVFilterContext *avctx, FFVkExecContext *e)
{ {
VulkanFilterContext *s = avctx->priv; return e->bufs[e->qf->cur_queue];
return e->bufs[s->cur_queue_idx];
} }
int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag) AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag)
{ {
AVFrame **dst; AVFrame **dst;
VulkanFilterContext *s = avctx->priv;
AVVkFrame *f = (AVVkFrame *)frame->data[0]; AVVkFrame *f = (AVVkFrame *)frame->data[0];
FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
int planes = av_pix_fmt_count_planes(fc->sw_format); int planes = av_pix_fmt_count_planes(fc->sw_format);
@ -517,16 +551,21 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
} }
e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
(e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
if (!e->sem_sig_val_dst) {
ff_vk_discard_exec_deps(avctx, e);
return AVERROR(ENOMEM);
}
e->sem_wait[e->sem_wait_cnt] = f->sem[i]; e->sem_wait[e->sem_wait_cnt] = f->sem[i];
e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i]; e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
e->sem_wait_cnt++; e->sem_wait_cnt++;
/* TODO: fix this in case execution fails */
f->sem_value[i]++;
e->sem_sig[e->sem_sig_cnt] = f->sem[i]; e->sem_sig[e->sem_sig_cnt] = f->sem[i];
e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i]; e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
e->sem_sig_cnt++; e->sem_sig_cnt++;
} }
@ -551,9 +590,9 @@ int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e,
int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e) int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
{ {
VkResult ret; VkResult ret;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
FFVkQueueCtx *q = &e->queues[s->cur_queue_idx]; FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
@ -568,7 +607,7 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
.pNext = &s_timeline_sem_info, .pNext = &s_timeline_sem_info,
.commandBufferCount = 1, .commandBufferCount = 1,
.pCommandBuffers = &e->bufs[s->cur_queue_idx], .pCommandBuffers = &e->bufs[e->qf->cur_queue],
.pWaitSemaphores = e->sem_wait, .pWaitSemaphores = e->sem_wait,
.pWaitDstStageMask = e->sem_wait_dst, .pWaitDstStageMask = e->sem_wait_dst,
@ -578,7 +617,7 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
.signalSemaphoreCount = e->sem_sig_cnt, .signalSemaphoreCount = e->sem_sig_cnt,
}; };
ret = vk->EndCommandBuffer(e->bufs[s->cur_queue_idx]); ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
if (ret != VK_SUCCESS) { if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", av_log(avctx, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
ff_vk_ret2str(ret)); ff_vk_ret2str(ret));
@ -592,8 +631,8 @@ int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e)
return AVERROR_EXTERNAL; return AVERROR_EXTERNAL;
} }
/* Rotate queues */ for (int i = 0; i < e->sem_sig_cnt; i++)
s->cur_queue_idx = (s->cur_queue_idx + 1) % s->queue_count; *e->sem_sig_val_dst[i] += 1;
return 0; return 0;
} }
@ -602,8 +641,7 @@ int ff_vk_add_dep_exec_ctx(AVFilterContext *avctx, FFVkExecContext *e,
AVBufferRef **deps, int nb_deps) AVBufferRef **deps, int nb_deps)
{ {
AVBufferRef **dst; AVBufferRef **dst;
VulkanFilterContext *s = avctx->priv; FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
FFVkQueueCtx *q = &e->queues[s->cur_queue_idx];
if (!deps || !nb_deps) if (!deps || !nb_deps)
return 0; return 0;
@ -632,7 +670,7 @@ err:
static int vulkan_filter_set_device(AVFilterContext *avctx, static int vulkan_filter_set_device(AVFilterContext *avctx,
AVBufferRef *device) AVBufferRef *device)
{ {
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
av_buffer_unref(&s->device_ref); av_buffer_unref(&s->device_ref);
@ -649,7 +687,7 @@ static int vulkan_filter_set_device(AVFilterContext *avctx,
static int vulkan_filter_set_frames(AVFilterContext *avctx, static int vulkan_filter_set_frames(AVFilterContext *avctx,
AVBufferRef *frames) AVBufferRef *frames)
{ {
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
av_buffer_unref(&s->frames_ref); av_buffer_unref(&s->frames_ref);
@ -664,7 +702,8 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
{ {
int err; int err;
AVFilterContext *avctx = inlink->dst; AVFilterContext *avctx = inlink->dst;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn;
AVHWFramesContext *input_frames; AVHWFramesContext *input_frames;
if (!inlink->hw_frames_ctx) { if (!inlink->hw_frames_ctx) {
@ -695,6 +734,9 @@ int ff_vk_filter_config_input(AVFilterLink *inlink)
if (err < 0) if (err < 0)
return err; return err;
vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props);
vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops);
/* Default output parameters match input parameters. */ /* Default output parameters match input parameters. */
s->input_format = input_frames->sw_format; s->input_format = input_frames->sw_format;
if (s->output_format == AV_PIX_FMT_NONE) if (s->output_format == AV_PIX_FMT_NONE)
@ -711,7 +753,7 @@ int ff_vk_filter_config_output_inplace(AVFilterLink *outlink)
{ {
int err; int err;
AVFilterContext *avctx = outlink->src; AVFilterContext *avctx = outlink->src;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
av_buffer_unref(&outlink->hw_frames_ctx); av_buffer_unref(&outlink->hw_frames_ctx);
@ -741,7 +783,7 @@ int ff_vk_filter_config_output(AVFilterLink *outlink)
{ {
int err; int err;
AVFilterContext *avctx = outlink->src; AVFilterContext *avctx = outlink->src;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
AVBufferRef *output_frames_ref; AVBufferRef *output_frames_ref;
AVHWFramesContext *output_frames; AVHWFramesContext *output_frames;
@ -790,7 +832,7 @@ fail:
int ff_vk_filter_init(AVFilterContext *avctx) int ff_vk_filter_init(AVFilterContext *avctx)
{ {
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
s->output_format = AV_PIX_FMT_NONE; s->output_format = AV_PIX_FMT_NONE;
@ -800,12 +842,12 @@ int ff_vk_filter_init(AVFilterContext *avctx)
return 0; return 0;
} }
FN_CREATING(VulkanFilterContext, VkSampler, sampler, samplers, samplers_num) FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, FFVkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
VkFilter filt) VkFilter filt)
{ {
VkResult ret; VkResult ret;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
VkSamplerCreateInfo sampler_info = { VkSamplerCreateInfo sampler_info = {
@ -823,19 +865,22 @@ VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
.unnormalizedCoordinates = unnorm_coords, .unnormalizedCoordinates = unnorm_coords,
}; };
VkSampler *sampler = create_sampler(s); FFVkSampler *sctx = create_sampler(s);
if (!sampler) if (!sctx)
return NULL; return NULL;
ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
s->hwctx->alloc, sampler); s->hwctx->alloc, &sctx->sampler[0]);
if (ret != VK_SUCCESS) { if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n", av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n",
ff_vk_ret2str(ret)); ff_vk_ret2str(ret));
return NULL; return NULL;
} }
return sampler; for (int i = 1; i < 4; i++)
sctx->sampler[i] = sctx->sampler[0];
return sctx;
} }
int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
@ -863,7 +908,7 @@ typedef struct ImageViewCtx {
static void destroy_imageview(void *opaque, uint8_t *data) static void destroy_imageview(void *opaque, uint8_t *data)
{ {
VulkanFilterContext *s = opaque; FFVulkanContext *s = opaque;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
ImageViewCtx *iv = (ImageViewCtx *)data; ImageViewCtx *iv = (ImageViewCtx *)data;
@ -877,7 +922,7 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
{ {
int err; int err;
AVBufferRef *buf; AVBufferRef *buf;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
VkImageViewCreateInfo imgview_spawn = { VkImageViewCreateInfo imgview_spawn = {
@ -924,8 +969,8 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
return 0; return 0;
} }
FN_CREATING(VulkanPipeline, FFSPIRVShader, shader, shaders, shaders_num) FN_CREATING(FFVulkanPipeline, FFSPIRVShader, shader, shaders, shaders_num)
FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl, FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, FFVulkanPipeline *pl,
const char *name, VkShaderStageFlags stage) const char *name, VkShaderStageFlags stage)
{ {
FFSPIRVShader *shd = create_shader(pl); FFSPIRVShader *shd = create_shader(pl);
@ -984,7 +1029,7 @@ int ff_vk_compile_shader(AVFilterContext *avctx, FFSPIRVShader *shd,
{ {
int err; int err;
VkResult ret; VkResult ret;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
VkShaderModuleCreateInfo shader_create; VkShaderModuleCreateInfo shader_create;
uint8_t *spirv; uint8_t *spirv;
@ -1043,25 +1088,24 @@ static const struct descriptor_props {
[VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, },
}; };
int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, int ff_vk_add_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
FFSPIRVShader *shd, VulkanDescriptorSetBinding *desc, FFSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
int num, int only_print_to_shader) int num, int only_print_to_shader)
{ {
VkResult ret; VkResult ret;
VkDescriptorSetLayout *layout; VkDescriptorSetLayout *layout;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
if (only_print_to_shader) if (only_print_to_shader)
goto print; goto print;
pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
pl->desc_layout_num + 1); pl->desc_layout_num + pl->qf->nb_queues);
if (!pl->desc_layout) if (!pl->desc_layout)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
layout = &pl->desc_layout[pl->desc_layout_num]; layout = &pl->desc_layout[pl->desc_layout_num];
memset(layout, 0, sizeof(*layout));
{ /* Create descriptor set layout descriptions */ { /* Create descriptor set layout descriptions */
VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
@ -1076,23 +1120,29 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
desc_binding[i].descriptorType = desc[i].type; desc_binding[i].descriptorType = desc[i].type;
desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1);
desc_binding[i].stageFlags = desc[i].stages; desc_binding[i].stageFlags = desc[i].stages;
desc_binding[i].pImmutableSamplers = desc[i].samplers; desc_binding[i].pImmutableSamplers = desc[i].sampler ?
desc[i].sampler->sampler :
NULL;
} }
desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
desc_create_layout.pBindings = desc_binding; desc_create_layout.pBindings = desc_binding;
desc_create_layout.bindingCount = num; desc_create_layout.bindingCount = num;
for (int i = 0; i < pl->qf->nb_queues; i++) {
ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
s->hwctx->alloc, layout); s->hwctx->alloc, &layout[i]);
av_free(desc_binding);
if (ret != VK_SUCCESS) { if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set " av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set "
"layout: %s\n", ff_vk_ret2str(ret)); "layout: %s\n", ff_vk_ret2str(ret));
av_free(desc_binding);
return AVERROR_EXTERNAL; return AVERROR_EXTERNAL;
} }
} }
av_free(desc_binding);
}
{ /* Pool each descriptor by type and update pool counts */ { /* Pool each descriptor by type and update pool counts */
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
int j; int j;
@ -1108,7 +1158,7 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
} }
pl->pool_size_desc[j].type = desc[i].type; pl->pool_size_desc[j].type = desc[i].type;
pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1); pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
} }
} }
@ -1132,27 +1182,32 @@ int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl,
pl->desc_template_info = av_realloc_array(pl->desc_template_info, pl->desc_template_info = av_realloc_array(pl->desc_template_info,
sizeof(*pl->desc_template_info), sizeof(*pl->desc_template_info),
pl->desc_layout_num + 1); pl->total_descriptor_sets + pl->qf->nb_queues);
if (!pl->desc_template_info) if (!pl->desc_template_info)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
dt = &pl->desc_template_info[pl->desc_layout_num]; dt = &pl->desc_template_info[pl->total_descriptor_sets];
memset(dt, 0, sizeof(*dt)); memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; for (int i = 0; i < pl->qf->nb_queues; i++) {
dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
dt->descriptorSetLayout = *layout; dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
dt->pDescriptorUpdateEntries = des_entries; dt[i].descriptorSetLayout = layout[i];
dt->descriptorUpdateEntryCount = num; dt[i].pDescriptorUpdateEntries = des_entries;
dt[i].descriptorUpdateEntryCount = num;
}
} }
pl->desc_layout_num++; pl->descriptor_sets_num++;
pl->desc_layout_num += pl->qf->nb_queues;
pl->total_descriptor_sets += pl->qf->nb_queues;
print: print:
/* Write shader info */ /* Write shader info */
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
const struct descriptor_props *prop = &descriptor_props[desc[i].type]; const struct descriptor_props *prop = &descriptor_props[desc[i].type];
GLSLA("layout (set = %i, binding = %i", pl->desc_layout_num - 1, i); GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
if (desc[i].mem_layout) if (desc[i].mem_layout)
GLSLA(", %s", desc[i].mem_layout); GLSLA(", %s", desc[i].mem_layout);
@ -1184,12 +1239,14 @@ print:
return 0; return 0;
} }
void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, void ff_vk_update_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
int set_id) int set_id)
{ {
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
pl->desc_set[set_id], pl->desc_set[set_id],
pl->desc_template[set_id], pl->desc_template[set_id],
@ -1200,27 +1257,29 @@ void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e,
VkShaderStageFlagBits stage, int offset, VkShaderStageFlagBits stage, int offset,
size_t size, void *src) size_t size, void *src)
{ {
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
vk->CmdPushConstants(e->bufs[s->cur_queue_idx], e->bound_pl->pipeline_layout, vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
stage, offset, size, src); stage, offset, size, src);
} }
int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) int ff_vk_init_pipeline_layout(AVFilterContext *avctx, FFVulkanPipeline *pl)
{ {
VkResult ret; VkResult ret;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
pl->descriptor_sets_num = pl->desc_layout_num * s->queue_count; pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
if (!pl->desc_staging)
return AVERROR(ENOMEM);
{ /* Init descriptor set pool */ { /* Init descriptor set pool */
VkDescriptorPoolCreateInfo pool_create_info = { VkDescriptorPoolCreateInfo pool_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
.poolSizeCount = pl->pool_size_desc_num, .poolSizeCount = pl->pool_size_desc_num,
.pPoolSizes = pl->pool_size_desc, .pPoolSizes = pl->pool_size_desc,
.maxSets = pl->descriptor_sets_num, .maxSets = pl->total_descriptor_sets,
}; };
ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
@ -1237,11 +1296,11 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
VkDescriptorSetAllocateInfo alloc_info = { VkDescriptorSetAllocateInfo alloc_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.descriptorPool = pl->desc_pool, .descriptorPool = pl->desc_pool,
.descriptorSetCount = pl->descriptor_sets_num, .descriptorSetCount = pl->total_descriptor_sets,
.pSetLayouts = pl->desc_layout, .pSetLayouts = pl->desc_layout,
}; };
pl->desc_set = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_set)); pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
if (!pl->desc_set) if (!pl->desc_set)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
@ -1257,12 +1316,14 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
{ /* Finally create the pipeline layout */ { /* Finally create the pipeline layout */
VkPipelineLayoutCreateInfo spawn_pipeline_layout = { VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = pl->desc_layout_num, .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging,
.pSetLayouts = pl->desc_layout,
.pushConstantRangeCount = pl->push_consts_num, .pushConstantRangeCount = pl->push_consts_num,
.pPushConstantRanges = pl->push_consts, .pPushConstantRanges = pl->push_consts,
}; };
for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
s->hwctx->alloc, &pl->pipeline_layout); s->hwctx->alloc, &pl->pipeline_layout);
av_freep(&pl->push_consts); av_freep(&pl->push_consts);
@ -1275,21 +1336,19 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
} }
{ /* Descriptor template (for tightly packed descriptors) */ { /* Descriptor template (for tightly packed descriptors) */
VkDescriptorUpdateTemplateCreateInfo *desc_template_info; VkDescriptorUpdateTemplateCreateInfo *dt;
pl->desc_template = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_template)); pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
if (!pl->desc_template) if (!pl->desc_template)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
/* Create update templates for the descriptor sets */ /* Create update templates for the descriptor sets */
for (int i = 0; i < pl->descriptor_sets_num; i++) { for (int i = 0; i < pl->total_descriptor_sets; i++) {
desc_template_info = &pl->desc_template_info[i % pl->desc_layout_num]; dt = &pl->desc_template_info[i];
desc_template_info->pipelineLayout = pl->pipeline_layout; dt->pipelineLayout = pl->pipeline_layout;
ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev, ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
desc_template_info, dt, s->hwctx->alloc,
s->hwctx->alloc,
&pl->desc_template[i]); &pl->desc_template[i]);
av_free((void *)desc_template_info->pDescriptorUpdateEntries);
if (ret != VK_SUCCESS) { if (ret != VK_SUCCESS) {
av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor " av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor "
"template: %s\n", ff_vk_ret2str(ret)); "template: %s\n", ff_vk_ret2str(ret));
@ -1297,23 +1356,34 @@ int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl)
} }
} }
/* Free the duplicated memory used for the template entries */
for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
dt = &pl->desc_template_info[i];
av_free((void *)dt->pDescriptorUpdateEntries);
}
av_freep(&pl->desc_template_info); av_freep(&pl->desc_template_info);
} }
return 0; return 0;
} }
FN_CREATING(VulkanFilterContext, VulkanPipeline, pipeline, pipelines, pipelines_num) FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx) FFVulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx,
FFVkQueueFamilyCtx *qf)
{ {
return create_pipeline(avctx->priv); FFVulkanPipeline *pl = create_pipeline(avctx->priv);
if (pl)
pl->qf = qf;
return pl;
} }
int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl) int ff_vk_init_compute_pipeline(AVFilterContext *avctx, FFVulkanPipeline *pl)
{ {
int i; int i;
VkResult ret; VkResult ret;
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
VkComputePipelineCreateInfo pipe = { VkComputePipelineCreateInfo pipe = {
@ -1346,26 +1416,31 @@ int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl)
} }
void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
VulkanPipeline *pl) FFVulkanPipeline *pl)
{ {
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
vk->CmdBindPipeline(e->bufs[s->cur_queue_idx], pl->bind_point, pl->pipeline); vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
vk->CmdBindDescriptorSets(e->bufs[s->cur_queue_idx], pl->bind_point, for (int i = 0; i < pl->descriptor_sets_num; i++)
pl->pipeline_layout, 0, pl->descriptor_sets_num, pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
pl->desc_set, 0, 0);
vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
pl->pipeline_layout, 0,
pl->descriptor_sets_num,
(VkDescriptorSet *)pl->desc_staging,
0, NULL);
e->bound_pl = pl; e->bound_pl = pl;
} }
static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e) static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
{ {
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
/* Make sure all queues have finished executing */ /* Make sure all queues have finished executing */
for (int i = 0; i < s->queue_count; i++) { for (int i = 0; i < e->qf->nb_queues; i++) {
FFVkQueueCtx *q = &e->queues[i]; FFVkQueueCtx *q = &e->queues[i];
if (q->fence) { if (q->fence) {
@ -1389,7 +1464,7 @@ static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
} }
if (e->bufs) if (e->bufs)
vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, s->queue_count, e->bufs); vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
if (e->pool) if (e->pool)
vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
@ -1397,13 +1472,14 @@ static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e)
av_freep(&e->queues); av_freep(&e->queues);
av_freep(&e->sem_sig); av_freep(&e->sem_sig);
av_freep(&e->sem_sig_val); av_freep(&e->sem_sig_val);
av_freep(&e->sem_sig_val_dst);
av_freep(&e->sem_wait); av_freep(&e->sem_wait);
av_freep(&e->sem_wait_dst); av_freep(&e->sem_wait_dst);
av_freep(&e->sem_wait_val); av_freep(&e->sem_wait_val);
av_free(e); av_free(e);
} }
static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl) static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
{ {
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
@ -1433,6 +1509,7 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
s->hwctx->alloc); s->hwctx->alloc);
av_freep(&pl->desc_staging);
av_freep(&pl->desc_set); av_freep(&pl->desc_set);
av_freep(&pl->shaders); av_freep(&pl->shaders);
av_freep(&pl->desc_layout); av_freep(&pl->desc_layout);
@ -1443,8 +1520,10 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
/* Only freed in case of failure */ /* Only freed in case of failure */
av_freep(&pl->pool_size_desc); av_freep(&pl->pool_size_desc);
if (pl->desc_template_info) { if (pl->desc_template_info) {
for (int i = 0; i < pl->descriptor_sets_num; i++) for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
av_free((void *)pl->desc_template_info[i].pDescriptorUpdateEntries); VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
av_free((void *)dt->pDescriptorUpdateEntries);
}
av_freep(&pl->desc_template_info); av_freep(&pl->desc_template_info);
} }
@ -1453,7 +1532,7 @@ static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl)
void ff_vk_filter_uninit(AVFilterContext *avctx) void ff_vk_filter_uninit(AVFilterContext *avctx)
{ {
VulkanFilterContext *s = avctx->priv; FFVulkanContext *s = avctx->priv;
FFVulkanFunctions *vk = &s->vkfn; FFVulkanFunctions *vk = &s->vkfn;
ff_vk_glslang_uninit(); ff_vk_glslang_uninit();
@ -1463,7 +1542,8 @@ void ff_vk_filter_uninit(AVFilterContext *avctx)
av_freep(&s->exec_ctx); av_freep(&s->exec_ctx);
for (int i = 0; i < s->samplers_num; i++) { for (int i = 0; i < s->samplers_num; i++) {
vk->DestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc); vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
s->hwctx->alloc);
av_free(s->samplers[i]); av_free(s->samplers[i]);
} }
av_freep(&s->samplers); av_freep(&s->samplers);

@ -20,6 +20,7 @@
#define AVFILTER_VULKAN_H #define AVFILTER_VULKAN_H
#define VK_NO_PROTOTYPES #define VK_NO_PROTOTYPES
#define VK_ENABLE_BETA_EXTENSIONS
#include "avfilter.h" #include "avfilter.h"
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
@ -52,9 +53,6 @@
goto fail; \ goto fail; \
} while (0) } while (0)
/* Useful for attaching immutable samplers to arrays */
#define DUP_SAMPLER_ARRAY4(x) (VkSampler []){ x, x, x, x, }
typedef struct FFSPIRVShader { typedef struct FFSPIRVShader {
const char *name; /* Name for id/debugging purposes */ const char *name; /* Name for id/debugging purposes */
AVBPrint src; AVBPrint src;
@ -62,7 +60,11 @@ typedef struct FFSPIRVShader {
VkPipelineShaderStageCreateInfo shader; VkPipelineShaderStageCreateInfo shader;
} FFSPIRVShader; } FFSPIRVShader;
typedef struct VulkanDescriptorSetBinding { typedef struct FFVkSampler {
VkSampler sampler[4];
} FFVkSampler;
typedef struct FFVulkanDescriptorSetBinding {
const char *name; const char *name;
VkDescriptorType type; VkDescriptorType type;
const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */ const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */
@ -71,9 +73,9 @@ typedef struct VulkanDescriptorSetBinding {
uint32_t dimensions; /* Needed for e.g. sampler%iD */ uint32_t dimensions; /* Needed for e.g. sampler%iD */
uint32_t elems; /* 0 - scalar, 1 or more - vector */ uint32_t elems; /* 0 - scalar, 1 or more - vector */
VkShaderStageFlags stages; VkShaderStageFlags stages;
const VkSampler *samplers; /* Immutable samplers, length - #elems */ FFVkSampler *sampler; /* Sampler to use for all elems */
void *updater; /* Pointer to VkDescriptor*Info */ void *updater; /* Pointer to VkDescriptor*Info */
} VulkanDescriptorSetBinding; } FFVulkanDescriptorSetBinding;
typedef struct FFVkBuffer { typedef struct FFVkBuffer {
VkBuffer buf; VkBuffer buf;
@ -81,7 +83,15 @@ typedef struct FFVkBuffer {
VkMemoryPropertyFlagBits flags; VkMemoryPropertyFlagBits flags;
} FFVkBuffer; } FFVkBuffer;
typedef struct VulkanPipeline { typedef struct FFVkQueueFamilyCtx {
int queue_family;
int nb_queues;
int cur_queue;
} FFVkQueueFamilyCtx;
typedef struct FFVulkanPipeline {
FFVkQueueFamilyCtx *qf;
VkPipelineBindPoint bind_point; VkPipelineBindPoint bind_point;
/* Contexts */ /* Contexts */
@ -100,15 +110,18 @@ typedef struct VulkanPipeline {
VkDescriptorSetLayout *desc_layout; VkDescriptorSetLayout *desc_layout;
VkDescriptorPool desc_pool; VkDescriptorPool desc_pool;
VkDescriptorSet *desc_set; VkDescriptorSet *desc_set;
void **desc_staging;
VkDescriptorSetLayoutBinding **desc_binding;
VkDescriptorUpdateTemplate *desc_template; VkDescriptorUpdateTemplate *desc_template;
int desc_layout_num; int desc_layout_num;
int descriptor_sets_num; int descriptor_sets_num;
int total_descriptor_sets;
int pool_size_desc_num; int pool_size_desc_num;
/* Temporary, used to store data in between initialization stages */ /* Temporary, used to store data in between initialization stages */
VkDescriptorUpdateTemplateCreateInfo *desc_template_info; VkDescriptorUpdateTemplateCreateInfo *desc_template_info;
VkDescriptorPoolSize *pool_size_desc; VkDescriptorPoolSize *pool_size_desc;
} VulkanPipeline; } FFVulkanPipeline;
typedef struct FFVkQueueCtx { typedef struct FFVkQueueCtx {
VkFence fence; VkFence fence;
@ -126,6 +139,8 @@ typedef struct FFVkQueueCtx {
} FFVkQueueCtx; } FFVkQueueCtx;
typedef struct FFVkExecContext { typedef struct FFVkExecContext {
FFVkQueueFamilyCtx *qf;
VkCommandPool pool; VkCommandPool pool;
VkCommandBuffer *bufs; VkCommandBuffer *bufs;
FFVkQueueCtx *queues; FFVkQueueCtx *queues;
@ -134,7 +149,7 @@ typedef struct FFVkExecContext {
int *nb_deps; int *nb_deps;
int *dep_alloc_size; int *dep_alloc_size;
VulkanPipeline *bound_pl; FFVulkanPipeline *bound_pl;
VkSemaphore *sem_wait; VkSemaphore *sem_wait;
int sem_wait_alloc; /* Allocated sem_wait */ int sem_wait_alloc; /* Allocated sem_wait */
@ -152,23 +167,23 @@ typedef struct FFVkExecContext {
uint64_t *sem_sig_val; uint64_t *sem_sig_val;
int sem_sig_val_alloc; int sem_sig_val_alloc;
uint64_t **sem_sig_val_dst;
int sem_sig_val_dst_alloc;
} FFVkExecContext; } FFVkExecContext;
typedef struct VulkanFilterContext { typedef struct FFVulkanContext {
const AVClass *class; const AVClass *class;
FFVulkanFunctions vkfn; FFVulkanFunctions vkfn;
FFVulkanExtensions extensions; FFVulkanExtensions extensions;
VkPhysicalDeviceProperties props;
VkPhysicalDeviceMemoryProperties mprops;
AVBufferRef *device_ref; AVBufferRef *device_ref;
AVBufferRef *frames_ref; /* For in-place filtering */ AVBufferRef *frames_ref; /* For in-place filtering */
AVHWDeviceContext *device; AVHWDeviceContext *device;
AVVulkanDeviceContext *hwctx; AVVulkanDeviceContext *hwctx;
/* State - mirrored with the exec ctx */
int cur_queue_idx;
int queue_family_idx;
int queue_count;
/* Properties */ /* Properties */
int output_width; int output_width;
int output_height; int output_height;
@ -176,7 +191,7 @@ typedef struct VulkanFilterContext {
enum AVPixelFormat input_format; enum AVPixelFormat input_format;
/* Samplers */ /* Samplers */
VkSampler **samplers; FFVkSampler **samplers;
int samplers_num; int samplers_num;
/* Exec contexts */ /* Exec contexts */
@ -184,12 +199,12 @@ typedef struct VulkanFilterContext {
int exec_ctx_num; int exec_ctx_num;
/* Pipelines (each can have 1 shader of each type) */ /* Pipelines (each can have 1 shader of each type) */
VulkanPipeline **pipelines; FFVulkanPipeline **pipelines;
int pipelines_num; int pipelines_num;
void *scratch; /* Scratch memory used only in functions */ void *scratch; /* Scratch memory used only in functions */
unsigned int scratch_size; unsigned int scratch_size;
} VulkanFilterContext; } FFVulkanContext;
/* Identity mapping - r = r, b = b, g = g, a = a */ /* Identity mapping - r = r, b = b, g = g, a = a */
extern const VkComponentMapping ff_comp_identity_map; extern const VkComponentMapping ff_comp_identity_map;
@ -218,10 +233,22 @@ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt);
*/ */
const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt);
/**
* Initialize a queue family.
* A queue limit of 0 means no limit.
*/
void ff_vk_qf_init(AVFilterContext *avctx, FFVkQueueFamilyCtx *qf,
VkQueueFlagBits dev_family, int queue_limit);
/**
* Rotate through the queues in a queue family.
*/
void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf);
/** /**
* Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit()
*/ */
VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, FFVkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords,
VkFilter filt); VkFilter filt);
/** /**
@ -237,19 +264,20 @@ int ff_vk_create_imageview(AVFilterContext *avctx, FFVkExecContext *e,
* Define a push constant for a given stage into a pipeline. * Define a push constant for a given stage into a pipeline.
* Must be called before the pipeline layout has been initialized. * Must be called before the pipeline layout has been initialized.
*/ */
int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, int ff_vk_add_push_constant(AVFilterContext *avctx, FFVulkanPipeline *pl,
int offset, int size, VkShaderStageFlagBits stage); int offset, int size, VkShaderStageFlagBits stage);
/** /**
* Inits a pipeline. Everything in it will be auto-freed when calling * Inits a pipeline. Everything in it will be auto-freed when calling
* ff_vk_filter_uninit(). * ff_vk_filter_uninit().
*/ */
VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx); FFVulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx,
FFVkQueueFamilyCtx *qf);
/** /**
* Inits a shader for a specific pipeline. Will be auto-freed on uninit. * Inits a shader for a specific pipeline. Will be auto-freed on uninit.
*/ */
FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl, FFSPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, FFVulkanPipeline *pl,
const char *name, VkShaderStageFlags stage); const char *name, VkShaderStageFlags stage);
/** /**
@ -261,8 +289,8 @@ void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, FFSPIRVShader *shd,
/** /**
* Adds a descriptor set to the shader and registers them in the pipeline. * Adds a descriptor set to the shader and registers them in the pipeline.
*/ */
int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, int ff_vk_add_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
FFSPIRVShader *shd, VulkanDescriptorSetBinding *desc, FFSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
int num, int only_print_to_shader); int num, int only_print_to_shader);
/** /**
@ -280,27 +308,28 @@ void ff_vk_print_shader(AVFilterContext *avctx, FFSPIRVShader *shd, int prio);
* Initializes the pipeline layout after all shaders and descriptor sets have * Initializes the pipeline layout after all shaders and descriptor sets have
* been finished. * been finished.
*/ */
int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl); int ff_vk_init_pipeline_layout(AVFilterContext *avctx, FFVulkanPipeline *pl);
/** /**
* Initializes a compute pipeline. Will pick the first shader with the * Initializes a compute pipeline. Will pick the first shader with the
* COMPUTE flag set. * COMPUTE flag set.
*/ */
int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl); int ff_vk_init_compute_pipeline(AVFilterContext *avctx, FFVulkanPipeline *pl);
/** /**
* Updates a descriptor set via the updaters defined. * Updates a descriptor set via the updaters defined.
* Can be called immediately after pipeline creation, but must be called * Can be called immediately after pipeline creation, but must be called
* at least once before queue submission. * at least once before queue submission.
*/ */
void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, void ff_vk_update_descriptor_set(AVFilterContext *avctx, FFVulkanPipeline *pl,
int set_id); int set_id);
/** /**
* Init an execution context for command recording and queue submission. * Init an execution context for command recording and queue submission.
* WIll be auto-freed on uninit. * WIll be auto-freed on uninit.
*/ */
int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx); int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx,
FFVkQueueFamilyCtx *qf);
/** /**
* Begin recording to the command buffer. Previous execution must have been * Begin recording to the command buffer. Previous execution must have been
@ -313,7 +342,7 @@ int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e);
* Must be called after ff_vk_start_exec_recording() and before submission. * Must be called after ff_vk_start_exec_recording() and before submission.
*/ */
void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e,
VulkanPipeline *pl); FFVulkanPipeline *pl);
/** /**
* Updates push constants. * Updates push constants.

Loading…
Cancel
Save