/* * Direct3D 12 HW acceleration video decoder * * copyright (c) 2022-2023 Wu Jianhua * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include #include "libavutil/common.h" #include "libavutil/log.h" #include "libavutil/time.h" #include "libavutil/imgutils.h" #include "libavutil/hwcontext_d3d12va_internal.h" #include "libavutil/hwcontext_d3d12va.h" #include "avcodec.h" #include "decode.h" #include "d3d12va_decode.h" #include "dxva2_internal.h" typedef struct HelperObjects { ID3D12CommandAllocator *command_allocator; ID3D12Resource *buffer; uint64_t fence_value; } HelperObjects; int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx) { AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); return av_image_get_buffer_size(frames_ctx->sw_format, avctx->coded_width, avctx->coded_height, 1); } unsigned ff_d3d12va_get_surface_index(const AVCodecContext *avctx, D3D12VADecodeContext *ctx, const AVFrame *frame, int curr) { AVD3D12VAFrame *f; ID3D12Resource *res; unsigned i; f = (AVD3D12VAFrame *)frame->data[0]; if (!f) goto fail; res = f->texture; if (!res) goto fail; for (i = 0; i < ctx->max_num_ref; i++) { if (ctx->ref_resources[i] && res == ctx->ref_resources[i]) { ctx->used_mask |= 1 << i; return i; } } if (curr) { for (i = 0; i < ctx->max_num_ref; i++) { if (!((ctx->used_mask >> i) & 0x1)) { ctx->ref_resources[i] = res; return i; } } } fail: av_log(avctx, AV_LOG_WARNING, "Could not get surface index. Using 0 instead.\n"); return 0; } static int d3d12va_get_valid_helper_objects(AVCodecContext *avctx, ID3D12CommandAllocator **ppAllocator, ID3D12Resource **ppBuffer) { HRESULT hr; D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); HelperObjects obj = { 0 }; D3D12_HEAP_PROPERTIES heap_props = { .Type = D3D12_HEAP_TYPE_UPLOAD }; D3D12_RESOURCE_DESC desc = { .Dimension = D3D12_RESOURCE_DIMENSION_BUFFER, .Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT, .Width = ctx->bitstream_size, .Height = 1, .DepthOrArraySize = 1, .MipLevels = 1, .Format = DXGI_FORMAT_UNKNOWN, .SampleDesc = { .Count = 1, .Quality = 0 }, .Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR, .Flags = D3D12_RESOURCE_FLAG_NONE, }; if (av_fifo_peek(ctx->objects_queue, &obj, 1, 0) >= 0) { uint64_t completion = ID3D12Fence_GetCompletedValue(ctx->sync_ctx.fence); if (completion >= obj.fence_value) { *ppAllocator = obj.command_allocator; *ppBuffer = obj.buffer; av_fifo_read(ctx->objects_queue, &obj, 1); return 0; } } hr = ID3D12Device_CreateCommandAllocator(ctx->device_ctx->device, D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, &IID_ID3D12CommandAllocator, (void **)ppAllocator); if (FAILED(hr)) { av_log(avctx, AV_LOG_ERROR, "Failed to create a new command allocator!\n"); return AVERROR(EINVAL); } hr = ID3D12Device_CreateCommittedResource(ctx->device_ctx->device, &heap_props, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_GENERIC_READ, NULL, &IID_ID3D12Resource, (void **)ppBuffer); if (FAILED(hr)) { av_log(avctx, AV_LOG_ERROR, "Failed to create a new d3d12 buffer!\n"); return AVERROR(EINVAL); } return 0; } static int d3d12va_discard_helper_objects(AVCodecContext *avctx, ID3D12CommandAllocator *pAllocator, ID3D12Resource *pBuffer, uint64_t fence_value) { D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); HelperObjects obj = { .command_allocator = pAllocator, .buffer = pBuffer, .fence_value = fence_value, }; if (av_fifo_write(ctx->objects_queue, &obj, 1) < 0) { D3D12_OBJECT_RELEASE(pAllocator); D3D12_OBJECT_RELEASE(pBuffer); return AVERROR(ENOMEM); } return 0; } static int d3d12va_fence_completion(AVD3D12VASyncContext *psync_ctx) { uint64_t completion = ID3D12Fence_GetCompletedValue(psync_ctx->fence); if (completion < psync_ctx->fence_value) { if (FAILED(ID3D12Fence_SetEventOnCompletion(psync_ctx->fence, psync_ctx->fence_value, psync_ctx->event))) return AVERROR(EINVAL); WaitForSingleObjectEx(psync_ctx->event, INFINITE, FALSE); } return 0; } static void bufref_free_interface(void *opaque, uint8_t *data) { D3D12_OBJECT_RELEASE(opaque); } static AVBufferRef *bufref_wrap_interface(IUnknown *iface) { return av_buffer_create((uint8_t*)iface, 1, bufref_free_interface, iface, 0); } static int d3d12va_sync_with_gpu(AVCodecContext *avctx) { D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx.fence, ++ctx->sync_ctx.fence_value)); return d3d12va_fence_completion(&ctx->sync_ctx); fail: return AVERROR(EINVAL); } static int d3d12va_create_decoder_heap(AVCodecContext *avctx) { D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx; AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; D3D12_VIDEO_DECODER_HEAP_DESC desc = { .NodeMask = 0, .Configuration = ctx->cfg, .DecodeWidth = frames_ctx->width, .DecodeHeight = frames_ctx->height, .Format = frames_hwctx->format, .FrameRate = { avctx->framerate.num, avctx->framerate.den }, .BitRate = avctx->bit_rate, .MaxDecodePictureBufferCount = ctx->max_num_ref, }; DX_CHECK(ID3D12VideoDevice_CreateVideoDecoderHeap(device_hwctx->video_device, &desc, &IID_ID3D12VideoDecoderHeap, (void **)&ctx->decoder_heap)); return 0; fail: if (ctx->decoder) { av_log(avctx, AV_LOG_ERROR, "D3D12 doesn't support decoding frames with an extent " "[width(%d), height(%d)], on your device!\n", frames_ctx->width, frames_ctx->height); } return AVERROR(EINVAL); } static int d3d12va_create_decoder(AVCodecContext *avctx) { D3D12_VIDEO_DECODER_DESC desc; D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx; AVD3D12VAFramesContext *frames_hwctx = frames_ctx->hwctx; D3D12_FEATURE_DATA_VIDEO_DECODE_SUPPORT feature = { .NodeIndex = 0, .Configuration = ctx->cfg, .Width = frames_ctx->width, .Height = frames_ctx->height, .DecodeFormat = frames_hwctx->format, .FrameRate = { avctx->framerate.num, avctx->framerate.den }, .BitRate = avctx->bit_rate, }; DX_CHECK(ID3D12VideoDevice_CheckFeatureSupport(device_hwctx->video_device, D3D12_FEATURE_VIDEO_DECODE_SUPPORT, &feature, sizeof(feature))); if (!(feature.SupportFlags & D3D12_VIDEO_DECODE_SUPPORT_FLAG_SUPPORTED)) { av_log(avctx, AV_LOG_ERROR, "D3D12 video decode is not supported on this device.\n"); return AVERROR(ENOSYS); } if (!(feature.DecodeTier >= D3D12_VIDEO_DECODE_TIER_2)) { av_log(avctx, AV_LOG_ERROR, "D3D12 video decode on this device requires tier %d support, " "but it is not implemented.\n", feature.DecodeTier); return AVERROR_PATCHWELCOME; } desc = (D3D12_VIDEO_DECODER_DESC) { .NodeMask = 0, .Configuration = ctx->cfg, }; DX_CHECK(ID3D12VideoDevice_CreateVideoDecoder(device_hwctx->video_device, &desc, &IID_ID3D12VideoDecoder, (void **)&ctx->decoder)); ctx->decoder_ref = bufref_wrap_interface((IUnknown *)ctx->decoder); if (!ctx->decoder_ref) return AVERROR(ENOMEM); return 0; fail: return AVERROR(EINVAL); } int ff_d3d12va_common_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx) { AVHWFramesContext *frames_ctx = (AVHWFramesContext *)hw_frames_ctx->data; frames_ctx->format = AV_PIX_FMT_D3D12; frames_ctx->sw_format = avctx->sw_pix_fmt == AV_PIX_FMT_YUV420P10 ? AV_PIX_FMT_P010 : AV_PIX_FMT_NV12; frames_ctx->width = avctx->width; frames_ctx->height = avctx->height; return 0; } int ff_d3d12va_decode_init(AVCodecContext *avctx) { int ret; AVHWFramesContext *frames_ctx; D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); ID3D12Resource *buffer = NULL; ID3D12CommandAllocator *command_allocator = NULL; D3D12_COMMAND_QUEUE_DESC queue_desc = { .Type = D3D12_COMMAND_LIST_TYPE_VIDEO_DECODE, .Priority = 0, .Flags = D3D12_COMMAND_QUEUE_FLAG_NONE, .NodeMask = 0, }; ctx->pix_fmt = avctx->hwaccel->pix_fmt; ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_D3D12VA); if (ret < 0) return ret; frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx); ctx->device_ctx = (AVD3D12VADeviceContext *)frames_ctx->device_ctx->hwctx; if (frames_ctx->format != ctx->pix_fmt) { av_log(avctx, AV_LOG_ERROR, "Invalid pixfmt for hwaccel!\n"); goto fail; } ret = d3d12va_create_decoder(avctx); if (ret < 0) goto fail; ret = d3d12va_create_decoder_heap(avctx); if (ret < 0) goto fail; ctx->bitstream_size = ff_d3d12va_get_suitable_max_bitstream_size(avctx); ctx->ref_resources = av_calloc(ctx->max_num_ref, sizeof(*ctx->ref_resources)); if (!ctx->ref_resources) return AVERROR(ENOMEM); ctx->ref_subresources = av_calloc(ctx->max_num_ref, sizeof(*ctx->ref_subresources)); if (!ctx->ref_subresources) return AVERROR(ENOMEM); ctx->objects_queue = av_fifo_alloc2(D3D12VA_VIDEO_DEC_ASYNC_DEPTH, sizeof(HelperObjects), AV_FIFO_FLAG_AUTO_GROW); if (!ctx->objects_queue) return AVERROR(ENOMEM); DX_CHECK(ID3D12Device_CreateFence(ctx->device_ctx->device, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, (void **)&ctx->sync_ctx.fence)); ctx->sync_ctx.event = CreateEvent(NULL, FALSE, FALSE, NULL); if (!ctx->sync_ctx.event) goto fail; ret = d3d12va_get_valid_helper_objects(avctx, &command_allocator, &buffer); if (ret < 0) goto fail; DX_CHECK(ID3D12Device_CreateCommandQueue(ctx->device_ctx->device, &queue_desc, &IID_ID3D12CommandQueue, (void **)&ctx->command_queue)); DX_CHECK(ID3D12Device_CreateCommandList(ctx->device_ctx->device, 0, queue_desc.Type, command_allocator, NULL, &IID_ID3D12CommandList, (void **)&ctx->command_list)); DX_CHECK(ID3D12VideoDecodeCommandList_Close(ctx->command_list)); ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1, (ID3D12CommandList **)&ctx->command_list); ret = d3d12va_sync_with_gpu(avctx); if (ret < 0) goto fail; d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value); if (ret < 0) goto fail; return 0; fail: D3D12_OBJECT_RELEASE(command_allocator); D3D12_OBJECT_RELEASE(buffer); ff_d3d12va_decode_uninit(avctx); return AVERROR(EINVAL); } int ff_d3d12va_decode_uninit(AVCodecContext *avctx) { int num_allocator = 0; D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); HelperObjects obj; if (ctx->sync_ctx.fence) d3d12va_sync_with_gpu(avctx); av_freep(&ctx->ref_resources); av_freep(&ctx->ref_subresources); D3D12_OBJECT_RELEASE(ctx->command_list); D3D12_OBJECT_RELEASE(ctx->command_queue); if (ctx->objects_queue) { while (av_fifo_read(ctx->objects_queue, &obj, 1) >= 0) { num_allocator++; D3D12_OBJECT_RELEASE(obj.buffer); D3D12_OBJECT_RELEASE(obj.command_allocator); } av_log(avctx, AV_LOG_VERBOSE, "Total number of command allocators reused: %d\n", num_allocator); } av_fifo_freep2(&ctx->objects_queue); D3D12_OBJECT_RELEASE(ctx->sync_ctx.fence); if (ctx->sync_ctx.event) CloseHandle(ctx->sync_ctx.event); D3D12_OBJECT_RELEASE(ctx->decoder_heap); av_buffer_unref(&ctx->decoder_ref); return 0; } static inline int d3d12va_update_reference_frames_state(AVCodecContext *avctx, D3D12_RESOURCE_BARRIER *barriers, ID3D12Resource *current_resource, int state_before, int state_end) { D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); int num_barrier = 0; for (int i = 0; i < ctx->max_num_ref; i++) { if (((ctx->used_mask >> i) & 0x1) && ctx->ref_resources[i] && ctx->ref_resources[i] != current_resource) { barriers[num_barrier].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; barriers[num_barrier].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; barriers[num_barrier].Transition = (D3D12_RESOURCE_TRANSITION_BARRIER){ .pResource = ctx->ref_resources[i], .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, .StateBefore = state_before, .StateAfter = state_end, }; num_barrier++; } } return num_barrier; } int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame, const void *pp, unsigned pp_size, const void *qm, unsigned qm_size, int(*update_input_arguments)(AVCodecContext *, D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS *, ID3D12Resource *)) { int ret; D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx); ID3D12Resource *buffer = NULL; ID3D12CommandAllocator *command_allocator = NULL; AVD3D12VAFrame *f = (AVD3D12VAFrame *)frame->data[0]; ID3D12Resource *resource = (ID3D12Resource *)f->texture; ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list; D3D12_RESOURCE_BARRIER barriers[32] = { 0 }; D3D12_VIDEO_DECODE_INPUT_STREAM_ARGUMENTS input_args = { .NumFrameArguments = 2, .FrameArguments = { [0] = { .Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_PICTURE_PARAMETERS, .Size = pp_size, .pData = (void *)pp, }, [1] = { .Type = D3D12_VIDEO_DECODE_ARGUMENT_TYPE_INVERSE_QUANTIZATION_MATRIX, .Size = qm_size, .pData = (void *)qm, }, }, .pHeap = ctx->decoder_heap, }; D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS output_args = { .ConversionArguments = { 0 }, .OutputSubresource = 0, .pOutputTexture2D = resource, }; UINT num_barrier = 1; barriers[0] = (D3D12_RESOURCE_BARRIER) { .Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE, .Transition = { .pResource = resource, .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, .StateBefore = D3D12_RESOURCE_STATE_COMMON, .StateAfter = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE, }, }; memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref); input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref; input_args.ReferenceFrames.ppTexture2Ds = ctx->ref_resources; input_args.ReferenceFrames.pSubresources = ctx->ref_subresources; ret = d3d12va_fence_completion(&f->sync_ctx); if (ret < 0) goto fail; if (!qm) input_args.NumFrameArguments = 1; ret = d3d12va_get_valid_helper_objects(avctx, &command_allocator, &buffer); if (ret < 0) goto fail; ret = update_input_arguments(avctx, &input_args, buffer); if (ret < 0) goto fail; DX_CHECK(ID3D12CommandAllocator_Reset(command_allocator)); DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list, command_allocator)); num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[1], resource, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ); ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers); ID3D12VideoDecodeCommandList_DecodeFrame(cmd_list, ctx->decoder, &output_args, &input_args); for (int i = 0; i < num_barrier; i++) FFSWAP(D3D12_RESOURCE_STATES, barriers[i].Transition.StateBefore, barriers[i].Transition.StateAfter); ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers); DX_CHECK(ID3D12VideoDecodeCommandList_Close(cmd_list)); ID3D12CommandQueue_ExecuteCommandLists(ctx->command_queue, 1, (ID3D12CommandList **)&ctx->command_list); DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, f->sync_ctx.fence, ++f->sync_ctx.fence_value)); DX_CHECK(ID3D12CommandQueue_Signal(ctx->command_queue, ctx->sync_ctx.fence, ++ctx->sync_ctx.fence_value)); ret = d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value); if (ret < 0) return ret; return 0; fail: if (command_allocator) d3d12va_discard_helper_objects(avctx, command_allocator, buffer, ctx->sync_ctx.fence_value); return AVERROR(EINVAL); }