From ad884d100259e55cb51a4239cd8a4fd5154c2073 Mon Sep 17 00:00:00 2001 From: Anton Khirnov Date: Wed, 13 Jan 2016 14:25:58 +0100 Subject: [PATCH] hwcontext: add a CUDA implementation --- doc/APIchanges | 2 + libavutil/Makefile | 2 + libavutil/hwcontext.c | 3 + libavutil/hwcontext.h | 1 + libavutil/hwcontext_cuda.c | 270 +++++++++++++++++++++++++++++++++ libavutil/hwcontext_cuda.h | 46 ++++++ libavutil/hwcontext_internal.h | 1 + 7 files changed, 325 insertions(+) create mode 100644 libavutil/hwcontext_cuda.c create mode 100644 libavutil/hwcontext_cuda.h diff --git a/doc/APIchanges b/doc/APIchanges index d815d9fee3..d42868e22e 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -17,6 +17,8 @@ API changes, most recent first: xxxxxxx buffer.h - Add av_buffer_pool_init2(). xxxxxxx hwcontext.h - Add a new installed header hwcontext.h with a new API for handling hwaccel frames. + xxxxxxx hwcontext_cuda.h - Add a new installed header hwcontext_cuda.h with + CUDA-specific hwcontext definitions. xxxxxxx hwcontext_vdpau.h - Add a new installed header hwcontext_vdpau.h with VDPAU-specific hwcontext definitions. xxxxxxx pixfmt.h - Add AV_PIX_FMT_CUDA. diff --git a/libavutil/Makefile b/libavutil/Makefile index 180c37eb49..bc859256a7 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -24,6 +24,7 @@ HEADERS = adler32.h \ frame.h \ hmac.h \ hwcontext.h \ + hwcontext_cuda.h \ hwcontext_vdpau.h \ imgutils.h \ intfloat.h \ @@ -106,6 +107,7 @@ OBJS = adler32.o \ xtea.o \ OBJS-$(CONFIG_LZO) += lzo.o +OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o OBJS-$(CONFIG_VDPAU) += hwcontext_vdpau.o OBJS += $(COMPAT_OBJS:%=../compat/%) diff --git a/libavutil/hwcontext.c b/libavutil/hwcontext.c index 2aa712e963..b6d05181c4 100644 --- a/libavutil/hwcontext.c +++ b/libavutil/hwcontext.c @@ -29,6 +29,9 @@ #include "pixfmt.h" static const HWContextType *hw_table[] = { +#if CONFIG_CUDA + &ff_hwcontext_type_cuda, +#endif #if CONFIG_VDPAU &ff_hwcontext_type_vdpau, #endif diff --git a/libavutil/hwcontext.h b/libavutil/hwcontext.h index b30a20a42c..81ae817eb3 100644 --- a/libavutil/hwcontext.h +++ b/libavutil/hwcontext.h @@ -26,6 +26,7 @@ enum AVHWDeviceType { AV_HWDEVICE_TYPE_VDPAU, + AV_HWDEVICE_TYPE_CUDA, }; typedef struct AVHWDeviceInternal AVHWDeviceInternal; diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c new file mode 100644 index 0000000000..6b87b61175 --- /dev/null +++ b/libavutil/hwcontext_cuda.c @@ -0,0 +1,270 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "buffer.h" +#include "common.h" +#include "hwcontext.h" +#include "hwcontext_internal.h" +#include "hwcontext_cuda.h" +#include "mem.h" +#include "pixdesc.h" +#include "pixfmt.h" + +typedef struct CUDAFramesContext { + int shift_width, shift_height; +} CUDAFramesContext; + +static const enum AVPixelFormat supported_formats[] = { + AV_PIX_FMT_NV12, + AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUV444P, +}; + +static void cuda_buffer_free(void *opaque, uint8_t *data) +{ + AVHWFramesContext *ctx = opaque; + AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; + + CUcontext dummy; + + cuCtxPushCurrent(hwctx->cuda_ctx); + + cuMemFree((CUdeviceptr)data); + + cuCtxPopCurrent(&dummy); +} + +static AVBufferRef *cuda_pool_alloc(void *opaque, int size) +{ + AVHWFramesContext *ctx = opaque; + AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx; + + AVBufferRef *ret = NULL; + CUcontext dummy = NULL; + CUdeviceptr data; + CUresult err; + + err = cuCtxPushCurrent(hwctx->cuda_ctx); + if (err != CUDA_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n"); + return NULL; + } + + err = cuMemAlloc(&data, size); + if (err != CUDA_SUCCESS) + goto fail; + + ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0); + if (!ret) { + cuMemFree(data); + goto fail; + } + +fail: + cuCtxPopCurrent(&dummy); + return ret; +} + +static int cuda_frames_init(AVHWFramesContext *ctx) +{ + CUDAFramesContext *priv = ctx->internal->priv; + int i; + + for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { + if (ctx->sw_format == supported_formats[i]) + break; + } + if (i == FF_ARRAY_ELEMS(supported_formats)) { + av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n", + av_get_pix_fmt_name(ctx->sw_format)); + return AVERROR(ENOSYS); + } + + av_pix_fmt_get_chroma_sub_sample(ctx->sw_format, &priv->shift_width, &priv->shift_height); + + if (!ctx->pool) { + int size; + + switch (ctx->sw_format) { + case AV_PIX_FMT_NV12: + case AV_PIX_FMT_YUV420P: + size = ctx->width * ctx->height * 3 / 2; + break; + case AV_PIX_FMT_YUV444P: + size = ctx->width * ctx->height * 3; + break; + } + + ctx->internal->pool_internal = av_buffer_pool_init2(size, ctx, cuda_pool_alloc, NULL); + if (!ctx->internal->pool_internal) + return AVERROR(ENOMEM); + } + + return 0; +} + +static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) +{ + frame->buf[0] = av_buffer_pool_get(ctx->pool); + if (!frame->buf[0]) + return AVERROR(ENOMEM); + + switch (ctx->sw_format) { + case AV_PIX_FMT_NV12: + frame->data[0] = frame->buf[0]->data; + frame->data[1] = frame->data[0] + ctx->width * ctx->height; + frame->linesize[0] = ctx->width; + frame->linesize[1] = ctx->width; + break; + case AV_PIX_FMT_YUV420P: + frame->data[0] = frame->buf[0]->data; + frame->data[2] = frame->data[0] + ctx->width * ctx->height; + frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4; + frame->linesize[0] = ctx->width; + frame->linesize[1] = ctx->width / 2; + frame->linesize[2] = ctx->width / 2; + break; + case AV_PIX_FMT_YUV444P: + frame->data[0] = frame->buf[0]->data; + frame->data[1] = frame->data[0] + ctx->width * ctx->height; + frame->data[2] = frame->data[1] + ctx->width * ctx->height; + frame->linesize[0] = ctx->width; + frame->linesize[1] = ctx->width; + frame->linesize[2] = ctx->width; + break; + default: + av_frame_unref(frame); + return AVERROR_BUG; + } + + frame->format = AV_PIX_FMT_CUDA; + frame->width = ctx->width; + frame->height = ctx->height; + + return 0; +} + +static int cuda_transfer_get_formats(AVHWFramesContext *ctx, + enum AVHWFrameTransferDirection dir, + enum AVPixelFormat **formats) +{ + enum AVPixelFormat *fmts; + + fmts = av_malloc_array(2, sizeof(*fmts)); + if (!fmts) + return AVERROR(ENOMEM); + + fmts[0] = ctx->sw_format; + fmts[1] = AV_PIX_FMT_NONE; + + *formats = fmts; + + return 0; +} + +static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, + const AVFrame *src) +{ + CUDAFramesContext *priv = ctx->internal->priv; + AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; + + CUcontext dummy; + CUresult err; + int i; + + err = cuCtxPushCurrent(device_hwctx->cuda_ctx); + if (err != CUDA_SUCCESS) + return AVERROR_UNKNOWN; + + for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { + CUDA_MEMCPY2D cpy = { + .srcMemoryType = CU_MEMORYTYPE_DEVICE, + .dstMemoryType = CU_MEMORYTYPE_HOST, + .srcDevice = (CUdeviceptr)src->data[i], + .dstHost = dst->data[i], + .srcPitch = src->linesize[i], + .dstPitch = dst->linesize[i], + .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]), + .Height = src->height >> (i ? priv->shift_height : 0), + }; + + err = cuMemcpy2D(&cpy); + if (err != CUDA_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); + return AVERROR_UNKNOWN; + } + } + + cuCtxPopCurrent(&dummy); + + return 0; +} + +static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, + const AVFrame *src) +{ + CUDAFramesContext *priv = ctx->internal->priv; + AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx; + + CUcontext dummy; + CUresult err; + int i; + + err = cuCtxPushCurrent(device_hwctx->cuda_ctx); + if (err != CUDA_SUCCESS) + return AVERROR_UNKNOWN; + + for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) { + CUDA_MEMCPY2D cpy = { + .srcMemoryType = CU_MEMORYTYPE_HOST, + .dstMemoryType = CU_MEMORYTYPE_DEVICE, + .srcHost = src->data[i], + .dstDevice = (CUdeviceptr)dst->data[i], + .srcPitch = src->linesize[i], + .dstPitch = dst->linesize[i], + .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]), + .Height = src->height >> (i ? priv->shift_height : 0), + }; + + err = cuMemcpy2D(&cpy); + if (err != CUDA_SUCCESS) { + av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); + return AVERROR_UNKNOWN; + } + } + + cuCtxPopCurrent(&dummy); + + return 0; +} + +const HWContextType ff_hwcontext_type_cuda = { + .type = AV_HWDEVICE_TYPE_CUDA, + .name = "CUDA", + + .device_hwctx_size = sizeof(AVCUDADeviceContext), + .frames_priv_size = sizeof(CUDAFramesContext), + + .frames_init = cuda_frames_init, + .frames_get_buffer = cuda_get_buffer, + .transfer_get_formats = cuda_transfer_get_formats, + .transfer_data_to = cuda_transfer_data_to, + .transfer_data_from = cuda_transfer_data_from, + + .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE }, +}; diff --git a/libavutil/hwcontext_cuda.h b/libavutil/hwcontext_cuda.h new file mode 100644 index 0000000000..7f067c7572 --- /dev/null +++ b/libavutil/hwcontext_cuda.h @@ -0,0 +1,46 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#ifndef AVUTIL_HWCONTEXT_CUDA_H +#define AVUTIL_HWCONTEXT_CUDA_H + +#include + +#include "pixfmt.h" + +/** + * @file + * An API-specific header for AV_HWDEVICE_TYPE_CUDA. + * + * This API supports dynamic frame pools. AVHWFramesContext.pool must return + * AVBufferRefs whose data pointer is a CUdeviceptr. + */ + +/** + * This struct is allocated as AVHWDeviceContext.hwctx + */ +typedef struct AVCUDADeviceContext { + CUcontext cuda_ctx; +} AVCUDADeviceContext; + +/** + * AVHWFramesContext.hwctx is currently not used + */ + +#endif /* AVUTIL_HWCONTEXT_CUDA_H */ diff --git a/libavutil/hwcontext_internal.h b/libavutil/hwcontext_internal.h index 54f8d1050e..641232f140 100644 --- a/libavutil/hwcontext_internal.h +++ b/libavutil/hwcontext_internal.h @@ -86,6 +86,7 @@ struct AVHWFramesInternal { AVBufferPool *pool_internal; }; +extern const HWContextType ff_hwcontext_type_cuda; extern const HWContextType ff_hwcontext_type_vdpau; #endif /* AVUTIL_HWCONTEXT_INTERNAL_H */