diff --git a/configure b/configure index ad3894f77c..d4c0e31f56 100755 --- a/configure +++ b/configure @@ -236,6 +236,7 @@ External library support: --enable-libfontconfig enable libfontconfig, useful for drawtext filter [no] --enable-libfreetype enable libfreetype, needed for drawtext filter [no] --enable-libfribidi enable libfribidi, improves drawtext filter [no] + --enable-libglslang enable GLSL->SPIRV compilation via libglslang [no] --enable-libgme enable Game Music Emu via libgme [no] --enable-libgsm enable GSM de/encoding via libgsm [no] --enable-libiec61883 enable iec61883 via libiec61883 [no] @@ -1550,11 +1551,11 @@ require_cc(){ } require_cpp(){ - name="$1" - headers="$2" - classes="$3" - shift 3 - check_lib_cpp "$headers" "$classes" "$@" || die "ERROR: $name not found" + log require_cpp "$@" + name_version="$1" + name="${1%% *}" + shift + check_lib_cpp "$name" "$@" || die "ERROR: $name_version not found" } require_headers(){ @@ -1771,6 +1772,7 @@ EXTERNAL_LIBRARY_LIST=" libfontconfig libfreetype libfribidi + libglslang libgme libgsm libiec61883 @@ -6261,6 +6263,7 @@ enabled fontconfig && enable libfontconfig enabled libfontconfig && require_pkg_config libfontconfig fontconfig "fontconfig/fontconfig.h" FcInit enabled libfreetype && require_pkg_config libfreetype freetype2 "ft2build.h FT_FREETYPE_H" FT_Init_FreeType enabled libfribidi && require_pkg_config libfribidi fribidi fribidi.h fribidi_version_info +enabled libglslang && require_cpp libglslang glslang/SPIRV/GlslangToSpv.h "glslang::TIntermediate*" -lglslang -lOSDependent -lHLSL -lOGLCompiler -lSPVRemapper -lSPIRV -lSPIRV-Tools -lSPIRV-Tools-opt -lpthread -lstdc++ enabled libgme && { check_pkg_config libgme libgme gme/gme.h gme_new_emu || require libgme gme/gme.h gme_new_emu -lgme -lstdc++; } enabled libgsm && { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do diff --git a/libavfilter/Makefile b/libavfilter/Makefile index ead47c2855..d49b8bcefa 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -510,6 +510,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h +OBJS-$(CONFIG_LIBGLSLANG) += glslang.o + TOOLS = graph2dot TESTPROGS = drawutils filtfmts formats integral diff --git a/libavfilter/glslang.cpp b/libavfilter/glslang.cpp new file mode 100644 index 0000000000..497df6e245 --- /dev/null +++ b/libavfilter/glslang.cpp @@ -0,0 +1,243 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +extern "C" { +#include "libavutil/mem.h" +#include "libavutil/avassert.h" +} + +#include +#include +#include +#include + +#include "glslang.h" + +using namespace glslang; + +static pthread_mutex_t glslang_mutex = PTHREAD_MUTEX_INITIALIZER; +static int glslang_refcount = 0; + +/* We require Vulkan 1.1 */ +#define GLSL_VERSION EShTargetVulkan_1_1 + +/* Vulkan 1.1 implementations require SPIR-V 1.3 to be implemented */ +#define SPIRV_VERSION EShTargetSpv_1_3 + +// Taken from glslang's examples, which apparently generally bases the choices +// on OpenGL specification limits +static const TBuiltInResource DefaultTBuiltInResource = { + /* .MaxLights = */ 32, + /* .MaxClipPlanes = */ 6, + /* .MaxTextureUnits = */ 32, + /* .MaxTextureCoords = */ 32, + /* .MaxVertexAttribs = */ 64, + /* .MaxVertexUniformComponents = */ 4096, + /* .MaxVaryingFloats = */ 64, + /* .MaxVertexTextureImageUnits = */ 32, + /* .MaxCombinedTextureImageUnits = */ 80, + /* .MaxTextureImageUnits = */ 32, + /* .MaxFragmentUniformComponents = */ 4096, + /* .MaxDrawBuffers = */ 32, + /* .MaxVertexUniformVectors = */ 128, + /* .MaxVaryingVectors = */ 8, + /* .MaxFragmentUniformVectors = */ 16, + /* .MaxVertexOutputVectors = */ 16, + /* .MaxFragmentInputVectors = */ 15, + /* .MinProgramTexelOffset = */ -8, + /* .MaxProgramTexelOffset = */ 7, + /* .MaxClipDistances = */ 8, + /* .MaxComputeWorkGroupCountX = */ 65535, + /* .MaxComputeWorkGroupCountY = */ 65535, + /* .MaxComputeWorkGroupCountZ = */ 65535, + /* .MaxComputeWorkGroupSizeX = */ 1024, + /* .MaxComputeWorkGroupSizeY = */ 1024, + /* .MaxComputeWorkGroupSizeZ = */ 64, + /* .MaxComputeUniformComponents = */ 1024, + /* .MaxComputeTextureImageUnits = */ 16, + /* .MaxComputeImageUniforms = */ 8, + /* .MaxComputeAtomicCounters = */ 8, + /* .MaxComputeAtomicCounterBuffers = */ 1, + /* .MaxVaryingComponents = */ 60, + /* .MaxVertexOutputComponents = */ 64, + /* .MaxGeometryInputComponents = */ 64, + /* .MaxGeometryOutputComponents = */ 128, + /* .MaxFragmentInputComponents = */ 128, + /* .MaxImageUnits = */ 8, + /* .MaxCombinedImageUnitsAndFragmentOutputs = */ 8, + /* .MaxCombinedShaderOutputResources = */ 8, + /* .MaxImageSamples = */ 0, + /* .MaxVertexImageUniforms = */ 0, + /* .MaxTessControlImageUniforms = */ 0, + /* .MaxTessEvaluationImageUniforms = */ 0, + /* .MaxGeometryImageUniforms = */ 0, + /* .MaxFragmentImageUniforms = */ 8, + /* .MaxCombinedImageUniforms = */ 8, + /* .MaxGeometryTextureImageUnits = */ 16, + /* .MaxGeometryOutputVertices = */ 256, + /* .MaxGeometryTotalOutputComponents = */ 1024, + /* .MaxGeometryUniformComponents = */ 1024, + /* .MaxGeometryVaryingComponents = */ 64, + /* .MaxTessControlInputComponents = */ 128, + /* .MaxTessControlOutputComponents = */ 128, + /* .MaxTessControlTextureImageUnits = */ 16, + /* .MaxTessControlUniformComponents = */ 1024, + /* .MaxTessControlTotalOutputComponents = */ 4096, + /* .MaxTessEvaluationInputComponents = */ 128, + /* .MaxTessEvaluationOutputComponents = */ 128, + /* .MaxTessEvaluationTextureImageUnits = */ 16, + /* .MaxTessEvaluationUniformComponents = */ 1024, + /* .MaxTessPatchComponents = */ 120, + /* .MaxPatchVertices = */ 32, + /* .MaxTessGenLevel = */ 64, + /* .MaxViewports = */ 16, + /* .MaxVertexAtomicCounters = */ 0, + /* .MaxTessControlAtomicCounters = */ 0, + /* .MaxTessEvaluationAtomicCounters = */ 0, + /* .MaxGeometryAtomicCounters = */ 0, + /* .MaxFragmentAtomicCounters = */ 8, + /* .MaxCombinedAtomicCounters = */ 8, + /* .MaxAtomicCounterBindings = */ 1, + /* .MaxVertexAtomicCounterBuffers = */ 0, + /* .MaxTessControlAtomicCounterBuffers = */ 0, + /* .MaxTessEvaluationAtomicCounterBuffers = */ 0, + /* .MaxGeometryAtomicCounterBuffers = */ 0, + /* .MaxFragmentAtomicCounterBuffers = */ 1, + /* .MaxCombinedAtomicCounterBuffers = */ 1, + /* .MaxAtomicCounterBufferSize = */ 16384, + /* .MaxTransformFeedbackBuffers = */ 4, + /* .MaxTransformFeedbackInterleavedComponents = */ 64, + /* .MaxCullDistances = */ 8, + /* .MaxCombinedClipAndCullDistances = */ 8, + /* .MaxSamples = */ 4, + /* .maxMeshOutputVerticesNV = */ 256, + /* .maxMeshOutputPrimitivesNV = */ 512, + /* .maxMeshWorkGroupSizeX_NV = */ 32, + /* .maxMeshWorkGroupSizeY_NV = */ 1, + /* .maxMeshWorkGroupSizeZ_NV = */ 1, + /* .maxTaskWorkGroupSizeX_NV = */ 32, + /* .maxTaskWorkGroupSizeY_NV = */ 1, + /* .maxTaskWorkGroupSizeZ_NV = */ 1, + /* .maxMeshViewCountNV = */ 4, + + .limits = { + /* .nonInductiveForLoops = */ 1, + /* .whileLoops = */ 1, + /* .doWhileLoops = */ 1, + /* .generalUniformIndexing = */ 1, + /* .generalAttributeMatrixVectorIndexing = */ 1, + /* .generalVaryingIndexing = */ 1, + /* .generalSamplerIndexing = */ 1, + /* .generalVariableIndexing = */ 1, + /* .generalConstantMatrixVectorIndexing = */ 1, + } +}; + +GLSlangResult *glslang_compile(const char *glsl, enum GLSlangStage stage) +{ + GLSlangResult *res = (GLSlangResult *)av_mallocz(sizeof(*res)); + if (!res) + return NULL; + + static const EShLanguage lang[] = { + [GLSLANG_VERTEX] = EShLangVertex, + [GLSLANG_FRAGMENT] = EShLangFragment, + [GLSLANG_COMPUTE] = EShLangCompute, + }; + + assert(glslang_refcount); + TShader *shader = new TShader(lang[stage]); + if (!shader) { + res->rval = AVERROR(ENOMEM); + return res; + } + + shader->setEnvClient(EShClientVulkan, GLSL_VERSION); + shader->setEnvTarget(EShTargetSpv, SPIRV_VERSION); + shader->setStrings(&glsl, 1); + if (!shader->parse(&DefaultTBuiltInResource, GLSL_VERSION, true, EShMsgDefault)) { + res->error_msg = av_strdup(shader->getInfoLog()); + res->rval = AVERROR_EXTERNAL; + delete shader; + return res; + } + + TProgram *prog = new TProgram(); + if (!prog) { + res->rval = AVERROR(ENOMEM); + delete shader; + return res; + } + + prog->addShader(shader); + if (!prog->link(EShMsgDefault)) { + res->error_msg = av_strdup(prog->getInfoLog()); + res->rval = AVERROR_EXTERNAL; + delete shader; + delete prog; + return res; + } + + std::vector spirv; /* Result */ + + SpvOptions options; /* Options - by default all optimizations are off */ + options.generateDebugInfo = false; /* Makes sense for files but not here */ + options.disassemble = false; /* Will print disassembly on compilation */ + options.validate = false; /* Validates the generated SPIRV, unneeded */ + options.disableOptimizer = false; /* For debugging */ + options.optimizeSize = true; /* Its faster */ + + GlslangToSpv(*prog->getIntermediate(lang[stage]), spirv, NULL, &options); + + res->size = spirv.size()*sizeof(unsigned int); + res->data = av_memdup(spirv.data(), res->size); + if (!res->data) { + res->rval = AVERROR(ENOMEM); + delete shader; + delete prog; + return res; + } + + delete shader; + delete prog; + + return res; +} + +int glslang_init(void) +{ + int ret = 0; + + pthread_mutex_lock(&glslang_mutex); + if (glslang_refcount++ == 0) + ret = !InitializeProcess(); + pthread_mutex_unlock(&glslang_mutex); + + return ret; +} + +void glslang_uninit(void) +{ + pthread_mutex_lock(&glslang_mutex); + av_assert0(glslang_refcount > 0); + if (--glslang_refcount == 0) + FinalizeProcess(); + pthread_mutex_unlock(&glslang_mutex); +} diff --git a/libavfilter/glslang.h b/libavfilter/glslang.h new file mode 100644 index 0000000000..d33808b945 --- /dev/null +++ b/libavfilter/glslang.h @@ -0,0 +1,52 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_GLSLANG_H +#define AVFILTER_GLSLANG_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +int glslang_init(void); +void glslang_uninit(void); + +typedef struct GLSlangResult { + int rval; + char *error_msg; + + void *data; /* Shader data or NULL */ + size_t size; +} GLSlangResult; + +enum GLSlangStage { + GLSLANG_VERTEX, + GLSLANG_FRAGMENT, + GLSLANG_COMPUTE, +}; + +/* Compile GLSL into a SPIRV stream, if possible */ +GLSlangResult *glslang_compile(const char *glsl, enum GLSlangStage stage); + +#ifdef __cplusplus +} +#endif + +#endif /* AVFILTER_GLSLANG_H */ diff --git a/libavfilter/vulkan.c b/libavfilter/vulkan.c new file mode 100644 index 0000000000..558b4bfbe0 --- /dev/null +++ b/libavfilter/vulkan.c @@ -0,0 +1,1236 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "formats.h" +#include "vulkan.h" +#include "glslang.h" + +/* Generic macro for creating contexts which need to keep their addresses + * if another context is created. */ +#define FN_CREATING(ctx, type, shortname, array, num) \ +static av_always_inline type *create_ ##shortname(ctx *dctx) \ +{ \ + type **array, *sctx = av_mallocz(sizeof(*sctx)); \ + if (!sctx) \ + return NULL; \ + \ + array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\ + if (!array) { \ + av_free(sctx); \ + return NULL; \ + } \ + \ + dctx->array = array; \ + dctx->array[dctx->num++] = sctx; \ + \ + return sctx; \ +} + +const VkComponentMapping ff_comp_identity_map = { + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, +}; + +/* Converts return values to strings */ +const char *ff_vk_ret2str(VkResult res) +{ +#define CASE(VAL) case VAL: return #VAL + switch (res) { + CASE(VK_SUCCESS); + CASE(VK_NOT_READY); + CASE(VK_TIMEOUT); + CASE(VK_EVENT_SET); + CASE(VK_EVENT_RESET); + CASE(VK_INCOMPLETE); + CASE(VK_ERROR_OUT_OF_HOST_MEMORY); + CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); + CASE(VK_ERROR_INITIALIZATION_FAILED); + CASE(VK_ERROR_DEVICE_LOST); + CASE(VK_ERROR_MEMORY_MAP_FAILED); + CASE(VK_ERROR_LAYER_NOT_PRESENT); + CASE(VK_ERROR_EXTENSION_NOT_PRESENT); + CASE(VK_ERROR_FEATURE_NOT_PRESENT); + CASE(VK_ERROR_INCOMPATIBLE_DRIVER); + CASE(VK_ERROR_TOO_MANY_OBJECTS); + CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); + CASE(VK_ERROR_FRAGMENTED_POOL); + CASE(VK_ERROR_SURFACE_LOST_KHR); + CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); + CASE(VK_SUBOPTIMAL_KHR); + CASE(VK_ERROR_OUT_OF_DATE_KHR); + CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); + CASE(VK_ERROR_VALIDATION_FAILED_EXT); + CASE(VK_ERROR_INVALID_SHADER_NV); + CASE(VK_ERROR_OUT_OF_POOL_MEMORY); + CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); + CASE(VK_ERROR_NOT_PERMITTED_EXT); + default: return "Unknown error"; + } +#undef CASE +} + +static int vk_alloc_mem(AVFilterContext *avctx, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) +{ + VkResult ret; + int index = -1; + VkPhysicalDeviceProperties props; + VkPhysicalDeviceMemoryProperties mprops; + VulkanFilterContext *s = avctx->priv; + + VkMemoryAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = alloc_extension, + }; + + vkGetPhysicalDeviceProperties(s->hwctx->phys_dev, &props); + vkGetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &mprops); + + /* Align if we need to */ + if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + req->size = FFALIGN(req->size, props.limits.minMemoryMapAlignment); + + alloc_info.allocationSize = req->size; + + /* The vulkan spec requires memory types to be sorted in the "optimal" + * order, so the first matching type we find will be the best/fastest one */ + for (int i = 0; i < mprops.memoryTypeCount; i++) { + /* The memory type must be supported by the requirements (bitfield) */ + if (!(req->memoryTypeBits & (1 << i))) + continue; + + /* The memory type flags must include our properties */ + if ((mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) + continue; + + /* Found a suitable memory type */ + index = i; + break; + } + + if (index < 0) { + av_log(avctx, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", + req_flags); + return AVERROR(EINVAL); + } + + alloc_info.memoryTypeIndex = index; + + ret = vkAllocateMemory(s->hwctx->act_dev, &alloc_info, + s->hwctx->alloc, mem); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to allocate memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR(ENOMEM); + } + + *mem_flags |= mprops.memoryTypes[index].propertyFlags; + + return 0; +} + +int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) +{ + int err; + VkResult ret; + VkMemoryRequirements req; + VulkanFilterContext *s = avctx->priv; + + VkBufferCreateInfo buf_spawn = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = NULL, + .usage = usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .size = size, /* Gets FFALIGNED during alloc if host visible + but should be ok */ + }; + + ret = vkCreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to create buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + vkGetBufferMemoryRequirements(s->hwctx->act_dev, buf->buf, &req); + + err = vk_alloc_mem(avctx, &req, flags, NULL, &buf->flags, &buf->mem); + if (err) + return err; + + ret = vkBindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[], + int nb_buffers, int invalidate) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + VkMappedMemoryRange *inval_list = NULL; + int inval_count = 0; + + for (int i = 0; i < nb_buffers; i++) { + ret = vkMapMemory(s->hwctx->act_dev, buf[i].mem, 0, + VK_WHOLE_SIZE, 0, (void **)&mem[i]); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + if (!invalidate) + return 0; + + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange ival_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + inval_list = av_fast_realloc(s->scratch, &s->scratch_size, + (++inval_count)*sizeof(*inval_list)); + if (!inval_list) + return AVERROR(ENOMEM); + inval_list[inval_count - 1] = ival_buf; + } + + if (inval_count) { + ret = vkInvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count, + inval_list); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + return 0; +} + +int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers, + int flush) +{ + int err = 0; + VkResult ret; + VulkanFilterContext *s = avctx->priv; + VkMappedMemoryRange *flush_list = NULL; + int flush_count = 0; + + if (flush) { + for (int i = 0; i < nb_buffers; i++) { + const VkMappedMemoryRange flush_buf = { + .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, + .memory = buf[i].mem, + .size = VK_WHOLE_SIZE, + }; + if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + continue; + flush_list = av_fast_realloc(s->scratch, &s->scratch_size, + (++flush_count)*sizeof(*flush_list)); + if (!flush_list) + return AVERROR(ENOMEM); + flush_list[flush_count - 1] = flush_buf; + } + } + + if (flush_count) { + ret = vkFlushMappedMemoryRanges(s->hwctx->act_dev, flush_count, + flush_list); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to flush memory: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ + } + } + + for (int i = 0; i < nb_buffers; i++) + vkUnmapMemory(s->hwctx->act_dev, buf[i].mem); + + return err; +} + +void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf) +{ + VulkanFilterContext *s = avctx->priv; + if (!buf) + return; + + if (buf->buf != VK_NULL_HANDLE) + vkDestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); + if (buf->mem != VK_NULL_HANDLE) + vkFreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); +} + +int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, + int offset, int size, VkShaderStageFlagBits stage) +{ + VkPushConstantRange *pc; + + pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), + pl->push_consts_num + 1); + if (!pl->push_consts) + return AVERROR(ENOMEM); + + pc = &pl->push_consts[pl->push_consts_num++]; + memset(pc, 0, sizeof(*pc)); + + pc->stageFlags = stage; + pc->offset = offset; + pc->size = size; + + return 0; +} + +FN_CREATING(VulkanFilterContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) +int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue) +{ + VkResult ret; + FFVkExecContext *e; + VulkanFilterContext *s = avctx->priv; + + VkCommandPoolCreateInfo cqueue_create = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = queue, + }; + VkCommandBufferAllocateInfo cbuf_create = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }; + VkFenceCreateInfo fence_spawn = { VK_STRUCTURE_TYPE_FENCE_CREATE_INFO }; + + e = create_exec_ctx(s); + if (!e) + return AVERROR(ENOMEM); + + ret = vkCreateCommandPool(s->hwctx->act_dev, &cqueue_create, + s->hwctx->alloc, &e->pool); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Command pool creation failure: %s\n", + ff_vk_ret2str(ret)); + return 1; + } + + cbuf_create.commandPool = e->pool; + + ret = vkAllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, &e->buf); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + ff_vk_ret2str(ret)); + return 1; + } + + ret = vkCreateFence(s->hwctx->act_dev, &fence_spawn, + s->hwctx->alloc, &e->fence); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to create frame fence: %s\n", + ff_vk_ret2str(ret)); + return 1; + } + + vkGetDeviceQueue(s->hwctx->act_dev, queue, 0, &e->queue); + + *ctx = e; + + return 0; +} + +int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e) +{ + VkResult ret; + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + e->sem_wait_cnt = 0; + e->sem_sig_cnt = 0; + + ret = vkBeginCommandBuffer(e->buf, &cmd_start); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Failed to start command recoding: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, + AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag) +{ + AVVkFrame *f = (AVVkFrame *)frame->data[0]; + AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; + int planes = av_pix_fmt_count_planes(fc->sw_format); + + for (int i = 0; i < planes; i++) { + e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, + (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); + if (!e->sem_wait) + return AVERROR(ENOMEM); + + e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, + (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); + if (!e->sem_wait_dst) + return AVERROR(ENOMEM); + + e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, + (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); + if (!e->sem_sig) + return AVERROR(ENOMEM); + + e->sem_wait[e->sem_wait_cnt] = f->sem[i]; + e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; + e->sem_wait_cnt++; + + e->sem_sig[e->sem_sig_cnt] = f->sem[i]; + e->sem_sig_cnt++; + } + + return 0; +} + +int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + VkSubmitInfo s_info = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &e->buf, + + .pWaitSemaphores = e->sem_wait, + .pWaitDstStageMask = e->sem_wait_dst, + .waitSemaphoreCount = e->sem_wait_cnt, + + .pSignalSemaphores = e->sem_sig, + .signalSemaphoreCount = e->sem_sig_cnt, + }; + + vkEndCommandBuffer(e->buf); + + ret = vkQueueSubmit(e->queue, 1, &s_info, e->fence); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + vkWaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vkResetFences(s->hwctx->act_dev, 1, &e->fence); + + return 0; +} + +int ff_vk_filter_query_formats(AVFilterContext *avctx) +{ + static const enum AVPixelFormat pixel_formats[] = { + AV_PIX_FMT_VULKAN, AV_PIX_FMT_NONE, + }; + AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats); + if (!pix_fmts) + return AVERROR(ENOMEM); + + return ff_set_common_formats(avctx, pix_fmts); +} + +static int vulkan_filter_set_device(AVFilterContext *avctx, + AVBufferRef *device) +{ + VulkanFilterContext *s = avctx->priv; + + av_buffer_unref(&s->device_ref); + + s->device_ref = av_buffer_ref(device); + if (!s->device_ref) + return AVERROR(ENOMEM); + + s->device = (AVHWDeviceContext*)s->device_ref->data; + s->hwctx = s->device->hwctx; + + return 0; +} + +static int vulkan_filter_set_frames(AVFilterContext *avctx, + AVBufferRef *frames) +{ + VulkanFilterContext *s = avctx->priv; + + av_buffer_unref(&s->frames_ref); + + s->frames_ref = av_buffer_ref(frames); + if (!s->frames_ref) + return AVERROR(ENOMEM); + + return 0; +} + +int ff_vk_filter_config_input(AVFilterLink *inlink) +{ + int err; + AVFilterContext *avctx = inlink->dst; + VulkanFilterContext *s = avctx->priv; + AVHWFramesContext *input_frames; + + if (!inlink->hw_frames_ctx) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + "hardware frames context on the input.\n"); + return AVERROR(EINVAL); + } + + /* Extract the device and default output format from the first input. */ + if (avctx->inputs[0] != inlink) + return 0; + + input_frames = (AVHWFramesContext*)inlink->hw_frames_ctx->data; + if (input_frames->format != AV_PIX_FMT_VULKAN) + return AVERROR(EINVAL); + + err = vulkan_filter_set_device(avctx, input_frames->device_ref); + if (err < 0) + return err; + err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx); + if (err < 0) + return err; + + /* Default output parameters match input parameters. */ + s->input_format = input_frames->sw_format; + if (s->output_format == AV_PIX_FMT_NONE) + s->output_format = input_frames->sw_format; + if (!s->output_width) + s->output_width = inlink->w; + if (!s->output_height) + s->output_height = inlink->h; + + return 0; +} + +int ff_vk_filter_config_output_inplace(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + VulkanFilterContext *s = avctx->priv; + + av_buffer_unref(&outlink->hw_frames_ctx); + + if (!s->device_ref) { + if (!avctx->hw_device_ctx) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + "Vulkan device.\n"); + return AVERROR(EINVAL); + } + + err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); + if (err < 0) + return err; + } + + outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref); + if (!outlink->hw_frames_ctx) + return AVERROR(ENOMEM); + + outlink->w = s->output_width; + outlink->h = s->output_height; + + return 0; +} + +int ff_vk_filter_config_output(AVFilterLink *outlink) +{ + int err; + AVFilterContext *avctx = outlink->src; + VulkanFilterContext *s = avctx->priv; + AVBufferRef *output_frames_ref; + AVHWFramesContext *output_frames; + + av_buffer_unref(&outlink->hw_frames_ctx); + + if (!s->device_ref) { + if (!avctx->hw_device_ctx) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + "Vulkan device.\n"); + return AVERROR(EINVAL); + } + + err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); + if (err < 0) + return err; + } + + output_frames_ref = av_hwframe_ctx_alloc(s->device_ref); + if (!output_frames_ref) { + err = AVERROR(ENOMEM); + goto fail; + } + output_frames = (AVHWFramesContext*)output_frames_ref->data; + + output_frames->format = AV_PIX_FMT_VULKAN; + output_frames->sw_format = s->output_format; + output_frames->width = s->output_width; + output_frames->height = s->output_height; + + err = av_hwframe_ctx_init(output_frames_ref); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Failed to initialise output " + "frames: %d.\n", err); + goto fail; + } + + outlink->hw_frames_ctx = output_frames_ref; + outlink->w = s->output_width; + outlink->h = s->output_height; + + return 0; +fail: + av_buffer_unref(&output_frames_ref); + return err; +} + +int ff_vk_filter_init(AVFilterContext *avctx) +{ + VulkanFilterContext *s = avctx->priv; + + s->output_format = AV_PIX_FMT_NONE; + + if (glslang_init()) + return AVERROR_EXTERNAL; + + return 0; +} + +FN_CREATING(VulkanFilterContext, VkSampler, sampler, samplers, samplers_num) +VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, + VkFilter filt) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + VkSamplerCreateInfo sampler_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = filt, + .minFilter = sampler_info.magFilter, + .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST : + VK_SAMPLER_MIPMAP_MODE_LINEAR, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, + .addressModeV = sampler_info.addressModeU, + .addressModeW = sampler_info.addressModeU, + .anisotropyEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, + .unnormalizedCoordinates = unnorm_coords, + }; + + VkSampler *sampler = create_sampler(s); + if (!sampler) + return NULL; + + ret = vkCreateSampler(s->hwctx->act_dev, &sampler_info, + s->hwctx->alloc, sampler); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init sampler: %s\n", + ff_vk_ret2str(ret)); + return NULL; + } + + return sampler; +} + +const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt); + const int high = desc->comp[0].depth > 8; + return high ? "rgba16f" : "rgba8"; +} + +int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img, + VkFormat fmt, const VkComponentMapping map) +{ + VulkanFilterContext *s = avctx->priv; + VkImageViewCreateInfo imgview_spawn = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = NULL, + .image = img, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = fmt, + .components = map, + .subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + + VkResult ret = vkCreateImageView(s->hwctx->act_dev, &imgview_spawn, + s->hwctx->alloc, v); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; +} + +void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v) +{ + VulkanFilterContext *s = avctx->priv; + if (v && *v) { + vkDestroyImageView(s->hwctx->act_dev, *v, s->hwctx->alloc); + *v = NULL; + } +} + +FN_CREATING(VulkanPipeline, SPIRVShader, shader, shaders, shaders_num) +SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl, + const char *name, VkShaderStageFlags stage) +{ + SPIRVShader *shd = create_shader(pl); + if (!shd) + return NULL; + + av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); + + shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + shd->shader.stage = stage; + + shd->name = name; + + GLSLF(0, #version %i ,460); + GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); + GLSLC(0, ); + + return shd; +} + +void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd, + int local_size[3]) +{ + shd->local_size[0] = local_size[0]; + shd->local_size[1] = local_size[1]; + shd->local_size[2] = local_size[2]; + + av_bprintf(&shd->src, "layout (local_size_x = %i, " + "local_size_y = %i, local_size_z = %i) in;\n\n", + shd->local_size[0], shd->local_size[1], shd->local_size[2]); +} + +static void print_shader(AVFilterContext *avctx, SPIRVShader *shd, int prio) +{ + int line = 0; + const char *p = shd->src.str; + const char *start = p; + + AVBPrint buf; + av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); + + for (int i = 0; i < strlen(p); i++) { + if (p[i] == '\n') { + av_bprintf(&buf, "%i\t", ++line); + av_bprint_append_data(&buf, start, &p[i] - start + 1); + start = &p[i + 1]; + } + } + + av_log(avctx, prio, "Shader %s: \n%s", shd->name, buf.str); + av_bprint_finalize(&buf, NULL); +} + +int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd, + const char *entrypoint) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + VkShaderModuleCreateInfo shader_create; + GLSlangResult *res; + + static const enum GLSlangStage emap[] = { + [VK_SHADER_STAGE_VERTEX_BIT] = GLSLANG_VERTEX, + [VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_FRAGMENT, + [VK_SHADER_STAGE_COMPUTE_BIT] = GLSLANG_COMPUTE, + }; + + shd->shader.pName = entrypoint; + + res = glslang_compile(shd->src.str, emap[shd->shader.stage]); + if (!res) + return AVERROR(ENOMEM); + + if (res->rval) { + av_log(avctx, AV_LOG_ERROR, "Error compiling shader %s: %s!\n", + shd->name, av_err2str(res->rval)); + print_shader(avctx, shd, AV_LOG_ERROR); + if (res->error_msg) + av_log(avctx, AV_LOG_ERROR, "%s", res->error_msg); + av_free(res->error_msg); + return res->rval; + } + + print_shader(avctx, shd, AV_LOG_VERBOSE); + + shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + shader_create.pNext = NULL; + shader_create.codeSize = res->size; + shader_create.flags = 0; + shader_create.pCode = res->data; + + ret = vkCreateShaderModule(s->hwctx->act_dev, &shader_create, NULL, + &shd->shader.module); + + /* Free the GLSlangResult struct */ + av_free(res); + + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to create shader module: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + av_log(avctx, AV_LOG_VERBOSE, "Shader %s linked! Size: %zu bytes\n", + shd->name, shader_create.codeSize); + + return 0; +} + +static const struct descriptor_props { + size_t struct_size; /* Size of the opaque which updates the descriptor */ + const char *type; + int is_uniform; + int mem_quali; /* Can use a memory qualifier */ + int dim_needed; /* Must indicate dimension */ + int buf_content; /* Must indicate buffer contents */ +} descriptor_props[] = { + [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, }, + [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, }, + [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, + [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, + [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, + [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, }, + [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, +}; + +int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + SPIRVShader *shd, VulkanDescriptorSetBinding *desc, + int num, int only_print_to_shader) +{ + VkResult ret; + VkDescriptorSetLayout *layout; + VulkanFilterContext *s = avctx->priv; + + if (only_print_to_shader) + goto print; + + pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), + pl->descriptor_sets_num + 1); + if (!pl->desc_layout) + return AVERROR(ENOMEM); + + layout = &pl->desc_layout[pl->descriptor_sets_num]; + memset(layout, 0, sizeof(*layout)); + + { /* Create descriptor set layout descriptions */ + VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; + VkDescriptorSetLayoutBinding *desc_binding; + + desc_binding = av_mallocz(sizeof(*desc_binding)*num); + if (!desc_binding) + return AVERROR(ENOMEM); + + for (int i = 0; i < num; i++) { + desc_binding[i].binding = i; + desc_binding[i].descriptorType = desc[i].type; + desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); + desc_binding[i].stageFlags = desc[i].stages; + desc_binding[i].pImmutableSamplers = desc[i].samplers; + } + + desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + desc_create_layout.pBindings = desc_binding; + desc_create_layout.bindingCount = num; + + ret = vkCreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, + s->hwctx->alloc, layout); + av_free(desc_binding); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set " + "layout: %s\n", ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Pool each descriptor by type and update pool counts */ + for (int i = 0; i < num; i++) { + int j; + for (j = 0; j < pl->pool_size_desc_num; j++) + if (pl->pool_size_desc[j].type == desc[i].type) + break; + if (j >= pl->pool_size_desc_num) { + pl->pool_size_desc = av_realloc_array(pl->pool_size_desc, + sizeof(*pl->pool_size_desc), + ++pl->pool_size_desc_num); + if (!pl->pool_size_desc) + return AVERROR(ENOMEM); + memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); + } + pl->pool_size_desc[j].type = desc[i].type; + pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1); + } + } + + { /* Create template creation struct */ + VkDescriptorUpdateTemplateCreateInfo *dt; + VkDescriptorUpdateTemplateEntry *des_entries; + + /* Freed after descriptor set initialization */ + des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry)); + if (!des_entries) + return AVERROR(ENOMEM); + + for (int i = 0; i < num; i++) { + des_entries[i].dstBinding = i; + des_entries[i].descriptorType = desc[i].type; + des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1); + des_entries[i].dstArrayElement = 0; + des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s; + des_entries[i].stride = descriptor_props[desc[i].type].struct_size; + } + + pl->desc_template_info = av_realloc_array(pl->desc_template_info, + sizeof(*pl->desc_template_info), + pl->descriptor_sets_num + 1); + if (!pl->desc_template_info) + return AVERROR(ENOMEM); + + dt = &pl->desc_template_info[pl->descriptor_sets_num]; + memset(dt, 0, sizeof(*dt)); + + dt->sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; + dt->templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; + dt->descriptorSetLayout = *layout; + dt->pDescriptorUpdateEntries = des_entries; + dt->descriptorUpdateEntryCount = num; + } + + pl->descriptor_sets_num++; + +print: + /* Write shader info */ + for (int i = 0; i < num; i++) { + const struct descriptor_props *prop = &descriptor_props[desc[i].type]; + GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); + + if (desc[i].mem_layout) + GLSLA(", %s", desc[i].mem_layout); + GLSLA(")"); + + if (prop->is_uniform) + GLSLA(" uniform"); + + if (prop->mem_quali && desc[i].mem_quali) + GLSLA(" %s", desc[i].mem_quali); + + if (prop->type) + GLSLA(" %s", prop->type); + + if (prop->dim_needed) + GLSLA("%iD", desc[i].dimensions); + + GLSLA(" %s", desc[i].name); + + if (prop->buf_content) + GLSLA(" {\n %s\n}", desc[i].buf_content); + else if (desc[i].elems > 0) + GLSLA("[%i]", desc[i].elems); + + GLSLA(";\n"); + } + GLSLA("\n"); + + return 0; +} + +void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + int set_id) +{ + VulkanFilterContext *s = avctx->priv; + + vkUpdateDescriptorSetWithTemplate(s->hwctx->act_dev, + pl->desc_set[set_id], + pl->desc_template[set_id], s); +} + +void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e, + VkShaderStageFlagBits stage, int offset, + size_t size, void *src) +{ + vkCmdPushConstants(e->buf, e->bound_pl->pipeline_layout, + stage, offset, size, src); +} + +int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl) +{ + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + { /* Init descriptor set pool */ + VkDescriptorPoolCreateInfo pool_create_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .poolSizeCount = pl->pool_size_desc_num, + .pPoolSizes = pl->pool_size_desc, + .maxSets = pl->descriptor_sets_num, + }; + + ret = vkCreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, + s->hwctx->alloc, &pl->desc_pool); + av_freep(&pl->pool_size_desc); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor set " + "pool: %s\n", ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Allocate descriptor sets */ + VkDescriptorSetAllocateInfo alloc_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = pl->desc_pool, + .descriptorSetCount = pl->descriptor_sets_num, + .pSetLayouts = pl->desc_layout, + }; + + pl->desc_set = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_set)); + if (!pl->desc_set) + return AVERROR(ENOMEM); + + ret = vkAllocateDescriptorSets(s->hwctx->act_dev, &alloc_info, + pl->desc_set); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Finally create the pipeline layout */ + VkPipelineLayoutCreateInfo spawn_pipeline_layout = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = pl->descriptor_sets_num, + .pSetLayouts = pl->desc_layout, + .pushConstantRangeCount = pl->push_consts_num, + .pPushConstantRanges = pl->push_consts, + }; + + ret = vkCreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, + s->hwctx->alloc, &pl->pipeline_layout); + av_freep(&pl->push_consts); + pl->push_consts_num = 0; + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + { /* Descriptor template (for tightly packed descriptors) */ + VkDescriptorUpdateTemplateCreateInfo *desc_template_info; + + pl->desc_template = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_template)); + if (!pl->desc_template) + return AVERROR(ENOMEM); + + /* Create update templates for the descriptor sets */ + for (int i = 0; i < pl->descriptor_sets_num; i++) { + desc_template_info = &pl->desc_template_info[i]; + desc_template_info->pipelineLayout = pl->pipeline_layout; + ret = vkCreateDescriptorUpdateTemplate(s->hwctx->act_dev, + desc_template_info, + s->hwctx->alloc, + &pl->desc_template[i]); + av_free((void *)desc_template_info->pDescriptorUpdateEntries); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init descriptor " + "template: %s\n", ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + } + + av_freep(&pl->desc_template_info); + } + + return 0; +} + +FN_CREATING(VulkanFilterContext, VulkanPipeline, pipeline, pipelines, pipelines_num) +VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx) +{ + return create_pipeline(avctx->priv); +} + +int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl) +{ + int i; + VkResult ret; + VulkanFilterContext *s = avctx->priv; + + VkComputePipelineCreateInfo pipe = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .layout = pl->pipeline_layout, + }; + + for (i = 0; i < pl->shaders_num; i++) { + if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) { + pipe.stage = pl->shaders[i]->shader; + break; + } + } + if (i == pl->shaders_num) { + av_log(avctx, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n"); + return AVERROR(EINVAL); + } + + ret = vkCreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe, + s->hwctx->alloc, &pl->pipeline); + if (ret != VK_SUCCESS) { + av_log(avctx, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + + return 0; +} + +void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, + VulkanPipeline *pl) +{ + vkCmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); + + vkCmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout, 0, + pl->descriptor_sets_num, pl->desc_set, 0, 0); + + e->bound_pl = pl; +} + +static void free_exec_ctx(VulkanFilterContext *s, FFVkExecContext *e) +{ + vkDestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); + + if (e->buf != VK_NULL_HANDLE) + vkFreeCommandBuffers(s->hwctx->act_dev, e->pool, 1, &e->buf); + if (e->pool != VK_NULL_HANDLE) + vkDestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); + + av_free(e->sem_wait); + av_free(e->sem_wait_dst); + av_free(e->sem_sig); + + av_free(e); +} + +static void free_pipeline(VulkanFilterContext *s, VulkanPipeline *pl) +{ + for (int i = 0; i < pl->shaders_num; i++) { + SPIRVShader *shd = pl->shaders[i]; + av_bprint_finalize(&shd->src, NULL); + vkDestroyShaderModule(s->hwctx->act_dev, shd->shader.module, + s->hwctx->alloc); + av_free(shd); + } + + vkDestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); + vkDestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, + s->hwctx->alloc); + + for (int i = 0; i < pl->descriptor_sets_num; i++) { + if (pl->desc_template && pl->desc_template[i]) + vkDestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i], + s->hwctx->alloc); + if (pl->desc_layout && pl->desc_layout[i]) + vkDestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i], + s->hwctx->alloc); + } + + /* Also frees the descriptor sets */ + if (pl->desc_pool) + vkDestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, + s->hwctx->alloc); + + av_freep(&pl->desc_set); + av_freep(&pl->shaders); + av_freep(&pl->desc_layout); + av_freep(&pl->desc_template); + av_freep(&pl->push_consts); + pl->push_consts_num = 0; + + /* Only freed in case of failure */ + av_freep(&pl->pool_size_desc); + if (pl->desc_template_info) { + for (int i = 0; i < pl->descriptor_sets_num; i++) + av_free((void *)pl->desc_template_info[i].pDescriptorUpdateEntries); + av_freep(&pl->desc_template_info); + } + + av_free(pl); +} + +void ff_vk_filter_uninit(AVFilterContext *avctx) +{ + VulkanFilterContext *s = avctx->priv; + + glslang_uninit(); + + for (int i = 0; i < s->samplers_num; i++) + vkDestroySampler(s->hwctx->act_dev, *s->samplers[i], s->hwctx->alloc); + av_freep(&s->samplers); + + for (int i = 0; i < s->pipelines_num; i++) + free_pipeline(s, s->pipelines[i]); + av_freep(&s->pipelines); + + for (int i = 0; i < s->exec_ctx_num; i++) + free_exec_ctx(s, s->exec_ctx[i]); + av_freep(&s->exec_ctx); + + av_freep(&s->scratch); + s->scratch_size = 0; + + av_buffer_unref(&s->device_ref); + av_buffer_unref(&s->frames_ref); +} diff --git a/libavfilter/vulkan.h b/libavfilter/vulkan.h new file mode 100644 index 0000000000..744e8c32f7 --- /dev/null +++ b/libavfilter/vulkan.h @@ -0,0 +1,323 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VULKAN_H +#define AVFILTER_VULKAN_H + +#include "avfilter.h" +#include "libavutil/pixdesc.h" +#include "libavutil/bprint.h" +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_vulkan.h" + +/* GLSL management macros */ +#define INDENT(N) INDENT_##N +#define INDENT_0 +#define INDENT_1 INDENT_0 " " +#define INDENT_2 INDENT_1 INDENT_1 +#define INDENT_3 INDENT_2 INDENT_1 +#define INDENT_4 INDENT_3 INDENT_1 +#define INDENT_5 INDENT_4 INDENT_1 +#define INDENT_6 INDENT_5 INDENT_1 +#define C(N, S) INDENT(N) #S "\n" +#define GLSLC(N, S) av_bprintf(&shd->src, C(N, S)) +#define GLSLA(...) av_bprintf(&shd->src, __VA_ARGS__) +#define GLSLF(N, S, ...) av_bprintf(&shd->src, C(N, S), __VA_ARGS__) +#define GLSLD(D) GLSLC(0, ); \ + av_bprint_append_data(&shd->src, D, strlen(D)); \ + GLSLC(0, ) + +/* Helper, pretty much every Vulkan return value needs to be checked */ +#define RET(x) \ + do { \ + if ((err = (x)) < 0) \ + goto fail; \ + } while (0) + +/* Useful for attaching immutable samplers to arrays */ +#define DUP_SAMPLER_ARRAY4(x) (VkSampler []){ x, x, x, x, } + +typedef struct SPIRVShader { + const char *name; /* Name for id/debugging purposes */ + AVBPrint src; + int local_size[3]; /* Compute shader workgroup sizes */ + VkPipelineShaderStageCreateInfo shader; +} SPIRVShader; + +typedef struct VulkanDescriptorSetBinding { + const char *name; + VkDescriptorType type; + const char *mem_layout; /* Storage images (rgba8, etc.) and buffers (std430, etc.) */ + const char *mem_quali; /* readonly, writeonly, etc. */ + const char *buf_content; /* For buffers */ + uint32_t dimensions; /* Needed for e.g. sampler%iD */ + uint32_t elems; /* 0 - scalar, 1 or more - vector */ + VkShaderStageFlags stages; + const VkSampler *samplers; /* Immutable samplers, length - #elems */ + void *updater; /* Pointer to VkDescriptor*Info */ +} VulkanDescriptorSetBinding; + +typedef struct FFVkBuffer { + VkBuffer buf; + VkDeviceMemory mem; + VkMemoryPropertyFlagBits flags; +} FFVkBuffer; + +typedef struct VulkanPipeline { + VkPipelineBindPoint bind_point; + + /* Contexts */ + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + + /* Shaders */ + SPIRVShader **shaders; + int shaders_num; + + /* Push consts */ + VkPushConstantRange *push_consts; + int push_consts_num; + + /* Descriptors */ + VkDescriptorSetLayout *desc_layout; + VkDescriptorPool desc_pool; + VkDescriptorSet *desc_set; + VkDescriptorUpdateTemplate *desc_template; + int descriptor_sets_num; + int pool_size_desc_num; + + /* Temporary, used to store data in between initialization stages */ + VkDescriptorUpdateTemplateCreateInfo *desc_template_info; + VkDescriptorPoolSize *pool_size_desc; +} VulkanPipeline; + +typedef struct FFVkExecContext { + VkCommandPool pool; + VkCommandBuffer buf; + VkQueue queue; + VkFence fence; + + VulkanPipeline *bound_pl; + + VkSemaphore *sem_wait; + int sem_wait_alloc; /* Allocated sem_wait */ + int sem_wait_cnt; + + VkPipelineStageFlagBits *sem_wait_dst; + int sem_wait_dst_alloc; /* Allocated sem_wait_dst */ + + VkSemaphore *sem_sig; + int sem_sig_alloc; /* Allocated sem_sig */ + int sem_sig_cnt; +} FFVkExecContext; + +typedef struct VulkanFilterContext { + const AVClass *class; + + AVBufferRef *device_ref; + AVBufferRef *frames_ref; /* For in-place filtering */ + AVHWDeviceContext *device; + AVVulkanDeviceContext *hwctx; + + /* Properties */ + int output_width; + int output_height; + enum AVPixelFormat output_format; + enum AVPixelFormat input_format; + + /* Samplers */ + VkSampler **samplers; + int samplers_num; + + /* Exec contexts */ + FFVkExecContext **exec_ctx; + int exec_ctx_num; + + /* Pipelines (each can have 1 shader of each type) */ + VulkanPipeline **pipelines; + int pipelines_num; + + void *scratch; /* Scratch memory used only in functions */ + unsigned int scratch_size; +} VulkanFilterContext; + +/* Identity mapping - r = r, b = b, g = g, a = a */ +extern const VkComponentMapping ff_comp_identity_map; + +/** + * General lavfi IO functions + */ +int ff_vk_filter_query_formats (AVFilterContext *avctx); +int ff_vk_filter_init (AVFilterContext *avctx); +int ff_vk_filter_config_input (AVFilterLink *inlink); +int ff_vk_filter_config_output (AVFilterLink *outlink); +int ff_vk_filter_config_output_inplace(AVFilterLink *outlink); +void ff_vk_filter_uninit (AVFilterContext *avctx); + +/** + * Converts Vulkan return values to strings + */ +const char *ff_vk_ret2str(VkResult res); + +/** + * Gets the glsl format string for a pixel format + */ +const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); + +/** + * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() + */ +VkSampler *ff_vk_init_sampler(AVFilterContext *avctx, int unnorm_coords, + VkFilter filt); + +/** + * Create an imageview. + */ +int ff_vk_create_imageview(AVFilterContext *avctx, VkImageView *v, VkImage img, + VkFormat fmt, const VkComponentMapping map); + +/** + * Destroy an imageview. Command buffer must have completed executing, which + * ff_vk_submit_exec_queue() will ensure + */ +void ff_vk_destroy_imageview(AVFilterContext *avctx, VkImageView *v); + +/** + * Define a push constant for a given stage into a pipeline. + * Must be called before the pipeline layout has been initialized. + */ +int ff_vk_add_push_constant(AVFilterContext *avctx, VulkanPipeline *pl, + int offset, int size, VkShaderStageFlagBits stage); + +/** + * Inits a pipeline. Everything in it will be auto-freed when calling + * ff_vk_filter_uninit(). + */ +VulkanPipeline *ff_vk_create_pipeline(AVFilterContext *avctx); + +/** + * Inits a shader for a specific pipeline. Will be auto-freed on uninit. + */ +SPIRVShader *ff_vk_init_shader(AVFilterContext *avctx, VulkanPipeline *pl, + const char *name, VkShaderStageFlags stage); + +/** + * Writes the workgroup size for a shader. + */ +void ff_vk_set_compute_shader_sizes(AVFilterContext *avctx, SPIRVShader *shd, + int local_size[3]); + +/** + * Adds a descriptor set to the shader and registers them in the pipeline. + */ +int ff_vk_add_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + SPIRVShader *shd, VulkanDescriptorSetBinding *desc, + int num, int only_print_to_shader); + +/** + * Compiles the shader, entrypoint must be set to "main". + */ +int ff_vk_compile_shader(AVFilterContext *avctx, SPIRVShader *shd, + const char *entrypoint); + +/** + * Initializes the pipeline layout after all shaders and descriptor sets have + * been finished. + */ +int ff_vk_init_pipeline_layout(AVFilterContext *avctx, VulkanPipeline *pl); + +/** + * Initializes a compute pipeline. Will pick the first shader with the + * COMPUTE flag set. + */ +int ff_vk_init_compute_pipeline(AVFilterContext *avctx, VulkanPipeline *pl); + +/** + * Updates a descriptor set via the updaters defined. + * Can be called immediately after pipeline creation, but must be called + * at least once before queue submission. + */ +void ff_vk_update_descriptor_set(AVFilterContext *avctx, VulkanPipeline *pl, + int set_id); + +/** + * Init an execution context for command recording and queue submission. + * WIll be auto-freed on uninit. + */ +int ff_vk_create_exec_ctx(AVFilterContext *avctx, FFVkExecContext **ctx, int queue); + +/** + * Begin recording to the command buffer. Previous execution must have been + * completed, which ff_vk_submit_exec_queue() will ensure. + */ +int ff_vk_start_exec_recording(AVFilterContext *avctx, FFVkExecContext *e); + +/** + * Add a command to bind the completed pipeline and its descriptor sets. + * Must be called after ff_vk_start_exec_recording() and before submission. + */ +void ff_vk_bind_pipeline_exec(AVFilterContext *avctx, FFVkExecContext *e, + VulkanPipeline *pl); + +/** + * Updates push constants. + * Must be called after binding a pipeline if any push constants were defined. + */ +void ff_vk_update_push_exec(AVFilterContext *avctx, FFVkExecContext *e, + VkShaderStageFlagBits stage, int offset, + size_t size, void *src); + +/** + * Adds a frame as a queue dependency. This manages semaphore signalling. + * Must be called before submission. + */ +int ff_vk_add_exec_dep(AVFilterContext *avctx, FFVkExecContext *e, + AVFrame *frame, VkPipelineStageFlagBits in_wait_dst_flag); + +/** + * Submits a command buffer to the queue for execution. + * Will block until execution has finished in order to simplify resource + * management. + */ +int ff_vk_submit_exec_queue(AVFilterContext *avctx, FFVkExecContext *e); + +/** + * Create a VkBuffer with the specified parameters. + */ +int ff_vk_create_buf(AVFilterContext *avctx, FFVkBuffer *buf, size_t size, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); + +/** + * Maps the buffer to userspace. Set invalidate to 1 if reading the contents + * is necessary. + */ +int ff_vk_map_buffers(AVFilterContext *avctx, FFVkBuffer *buf, uint8_t *mem[], + int nb_buffers, int invalidate); + +/** + * Unmaps the buffer from userspace. Set flush to 1 to write and sync. + */ +int ff_vk_unmap_buffers(AVFilterContext *avctx, FFVkBuffer *buf, int nb_buffers, + int flush); + +/** + * Frees a buffer. + */ +void ff_vk_free_buf(AVFilterContext *avctx, FFVkBuffer *buf); + +#endif /* AVFILTER_VULKAN_H */