@ -20,10 +20,8 @@
* DEALINGS IN THE SOFTWARE .
*/
# include <cuda.h>
# include "libavutil/hwcontext.h"
# include "libavutil/hwcontext_cuda.h"
# include "libavutil/hwcontext_cuda_internal.h"
# include "libavutil/cuda_check.h"
# include "libavutil/opt.h"
# include "libavutil/pixdesc.h"
@ -31,7 +29,7 @@
# include "avfilter.h"
# include "internal.h"
# define CHECK_CU(x) FF_CUDA_CHECK(ctx, x)
# define CHECK_CU(x) FF_CUDA_CHECK_DL (ctx, s->hwctx->internal->cuda_dl , x)
# define HIST_SIZE (3*256)
# define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
@ -60,6 +58,7 @@ typedef struct ThumbnailCudaContext {
AVRational tb ; ///< copy of the input timebase to ease access
AVBufferRef * hw_frames_ctx ;
AVCUDADeviceContext * hwctx ;
CUmodule cu_module ;
@ -67,12 +66,10 @@ typedef struct ThumbnailCudaContext {
CUfunction cu_func_uchar2 ;
CUfunction cu_func_ushort ;
CUfunction cu_func_ushort2 ;
CUtexref cu_tex_uchar ;
CUtexref cu_tex_uchar2 ;
CUtexref cu_tex_ushort ;
CUtexref cu_tex_ushort2 ;
CUstream cu_stream ;
CUdeviceptr data ;
} ThumbnailCudaContext ;
# define OFFSET(x) offsetof(ThumbnailCudaContext, x)
@ -157,29 +154,44 @@ static AVFrame *get_best_frame(AVFilterContext *ctx)
return picref ;
}
static int thumbnail_kernel ( ThumbnailCuda Context * ctx , CUfunction func , CUtexref tex , int channels ,
static int thumbnail_kernel ( AVFilter Context * ctx , CUfunction func , int channels ,
int * histogram , uint8_t * src_dptr , int src_width , int src_height , int src_pitch , int pixel_size )
{
CUdeviceptr src_devptr = ( CUdeviceptr ) src_dptr ;
void * args [ ] = { & histogram , & src_width , & src_height } ;
CUDA_ARRAY_DESCRIPTOR desc ;
desc . Width = src_width ;
desc . Height = src_height ;
desc . NumChannels = channels ;
if ( pixel_size = = 1 ) {
desc . Format = CU_AD_FORMAT_UNSIGNED_INT8 ;
}
else {
desc . Format = CU_AD_FORMAT_UNSIGNED_INT16 ;
}
int ret ;
ThumbnailCudaContext * s = ctx - > priv ;
CudaFunctions * cu = s - > hwctx - > internal - > cuda_dl ;
CUtexObject tex = 0 ;
void * args [ ] = { & tex , & histogram , & src_width , & src_height } ;
CHECK_CU ( cuTexRefSetAddress2D_v3 ( tex , & desc , src_devptr , src_pitch ) ) ;
CHECK_CU ( cuLaunchKernel ( func ,
DIV_UP ( src_width , BLOCKX ) , DIV_UP ( src_height , BLOCKY ) , 1 ,
BLOCKX , BLOCKY , 1 , 0 , 0 , args , NULL ) ) ;
CUDA_TEXTURE_DESC tex_desc = {
. filterMode = CU_TR_FILTER_MODE_LINEAR ,
. flags = CU_TRSF_READ_AS_INTEGER ,
} ;
return 0 ;
CUDA_RESOURCE_DESC res_desc = {
. resType = CU_RESOURCE_TYPE_PITCH2D ,
. res . pitch2D . format = pixel_size = = 1 ?
CU_AD_FORMAT_UNSIGNED_INT8 :
CU_AD_FORMAT_UNSIGNED_INT16 ,
. res . pitch2D . numChannels = channels ,
. res . pitch2D . width = src_width ,
. res . pitch2D . height = src_height ,
. res . pitch2D . pitchInBytes = src_pitch ,
. res . pitch2D . devPtr = ( CUdeviceptr ) src_dptr ,
} ;
ret = CHECK_CU ( cu - > cuTexObjectCreate ( & tex , & res_desc , & tex_desc , NULL ) ) ;
if ( ret < 0 )
goto exit ;
ret = CHECK_CU ( cu - > cuLaunchKernel ( func ,
DIV_UP ( src_width , BLOCKX ) , DIV_UP ( src_height , BLOCKY ) , 1 ,
BLOCKX , BLOCKY , 1 , 0 , s - > cu_stream , args , NULL ) ) ;
exit :
if ( tex )
CHECK_CU ( cu - > cuTexObjectDestroy ( tex ) ) ;
return ret ;
}
static int thumbnail ( AVFilterContext * ctx , int * histogram , AVFrame * in )
@ -189,40 +201,40 @@ static int thumbnail(AVFilterContext *ctx, int *histogram, AVFrame *in)
switch ( in_frames_ctx - > sw_format ) {
case AV_PIX_FMT_NV12 :
thumbnail_kernel ( s , s - > cu_func_uchar , s - > cu_tex _uchar , 1 ,
thumbnail_kernel ( ctx , s - > cu_func_uchar , 1 ,
histogram , in - > data [ 0 ] , in - > width , in - > height , in - > linesize [ 0 ] , 1 ) ;
thumbnail_kernel ( s , s - > cu_func_uchar2 , s - > cu_tex _uchar2 , 2 ,
thumbnail_kernel ( ctx , s - > cu_func_uchar2 , 2 ,
histogram + 256 , in - > data [ 1 ] , in - > width / 2 , in - > height / 2 , in - > linesize [ 1 ] , 1 ) ;
break ;
case AV_PIX_FMT_YUV420P :
thumbnail_kernel ( s , s - > cu_func_uchar , s - > cu_tex _uchar , 1 ,
thumbnail_kernel ( ctx , s - > cu_func_uchar , 1 ,
histogram , in - > data [ 0 ] , in - > width , in - > height , in - > linesize [ 0 ] , 1 ) ;
thumbnail_kernel ( s , s - > cu_func_uchar , s - > cu_tex _uchar , 1 ,
thumbnail_kernel ( ctx , s - > cu_func_uchar , 1 ,
histogram + 256 , in - > data [ 1 ] , in - > width / 2 , in - > height / 2 , in - > linesize [ 1 ] , 1 ) ;
thumbnail_kernel ( s , s - > cu_func_uchar , s - > cu_tex _uchar , 1 ,
thumbnail_kernel ( ctx , s - > cu_func_uchar , 1 ,
histogram + 512 , in - > data [ 2 ] , in - > width / 2 , in - > height / 2 , in - > linesize [ 2 ] , 1 ) ;
break ;
case AV_PIX_FMT_YUV444P :
thumbnail_kernel ( s , s - > cu_func_uchar , s - > cu_tex _uchar , 1 ,
thumbnail_kernel ( ctx , s - > cu_func_uchar , 1 ,
histogram , in - > data [ 0 ] , in - > width , in - > height , in - > linesize [ 0 ] , 1 ) ;
thumbnail_kernel ( s , s - > cu_func_uchar , s - > cu_tex _uchar , 1 ,
thumbnail_kernel ( ctx , s - > cu_func_uchar , 1 ,
histogram + 256 , in - > data [ 1 ] , in - > width , in - > height , in - > linesize [ 1 ] , 1 ) ;
thumbnail_kernel ( s , s - > cu_func_uchar , s - > cu_tex _uchar , 1 ,
thumbnail_kernel ( ctx , s - > cu_func_uchar , 1 ,
histogram + 512 , in - > data [ 2 ] , in - > width , in - > height , in - > linesize [ 2 ] , 1 ) ;
break ;
case AV_PIX_FMT_P010LE :
case AV_PIX_FMT_P016LE :
thumbnail_kernel ( s , s - > cu_func_ushort , s - > cu_tex _ushort , 1 ,
thumbnail_kernel ( ctx , s - > cu_func_ushort , 1 ,
histogram , in - > data [ 0 ] , in - > width , in - > height , in - > linesize [ 0 ] , 2 ) ;
thumbnail_kernel ( s , s - > cu_func_ushort2 , s - > cu_tex _ushort2 , 2 ,
thumbnail_kernel ( ctx , s - > cu_func_ushort2 , 2 ,
histogram + 256 , in - > data [ 1 ] , in - > width / 2 , in - > height / 2 , in - > linesize [ 1 ] , 2 ) ;
break ;
case AV_PIX_FMT_YUV444P16 :
thumbnail_kernel ( s , s - > cu_func_ushort2 , s - > cu_tex_uchar , 1 ,
thumbnail_kernel ( ctx , s - > cu_func_ushort2 , 1 ,
histogram , in - > data [ 0 ] , in - > width , in - > height , in - > linesize [ 0 ] , 2 ) ;
thumbnail_kernel ( s , s - > cu_func_ushort2 , s - > cu_tex_uchar , 1 ,
thumbnail_kernel ( ctx , s - > cu_func_ushort2 , 1 ,
histogram + 256 , in - > data [ 1 ] , in - > width , in - > height , in - > linesize [ 1 ] , 2 ) ;
thumbnail_kernel ( s , s - > cu_func_ushort2 , s - > cu_tex_uchar , 1 ,
thumbnail_kernel ( ctx , s - > cu_func_ushort2 , 1 ,
histogram + 512 , in - > data [ 2 ] , in - > width , in - > height , in - > linesize [ 2 ] , 2 ) ;
break ;
default :
@ -236,10 +248,10 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
{
AVFilterContext * ctx = inlink - > dst ;
ThumbnailCudaContext * s = ctx - > priv ;
CudaFunctions * cu = s - > hwctx - > internal - > cuda_dl ;
AVFilterLink * outlink = ctx - > outputs [ 0 ] ;
int * hist = s - > frames [ s - > n ] . histogram ;
AVHWFramesContext * hw_frames_ctx = ( AVHWFramesContext * ) s - > hw_frames_ctx - > data ;
AVCUDADeviceContext * device_hwctx = hw_frames_ctx - > device_ctx - > hwctx ;
CUcontext dummy ;
CUDA_MEMCPY2D cpy = { 0 } ;
int ret = 0 ;
@ -247,11 +259,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
// keep a reference of each frame
s - > frames [ s - > n ] . buf = frame ;
ret = CHECK_CU ( cuCtxPushCurrent ( device_ hwctx- > cuda_ctx ) ) ;
ret = CHECK_CU ( cu - > cu CtxPushCurrent( s - > hwctx - > cuda_ctx ) ) ;
if ( ret < 0 )
return ret ;
CHECK_CU ( cuMemsetD8 ( s - > data , 0 , HIST_SIZE * sizeof ( int ) ) ) ;
CHECK_CU ( cu - > cu MemsetD8Async ( s - > data , 0 , HIST_SIZE * sizeof ( int ) , s - > cu_stream ) ) ;
thumbnail ( ctx , ( int * ) s - > data , frame ) ;
@ -264,7 +276,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
cpy . WidthInBytes = HIST_SIZE * sizeof ( int ) ;
cpy . Height = 1 ;
ret = CHECK_CU ( cuMemcpy2D ( & cpy ) ) ;
ret = CHECK_CU ( cu - > cu Memcpy2DAsync ( & cpy , s - > cu_stream ) ) ;
if ( ret < 0 )
return ret ;
@ -276,7 +288,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame)
hist [ i ] = 4 * hist [ i ] ;
}
CHECK_CU ( cuCtxPopCurrent ( & dummy ) ) ;
CHECK_CU ( cu - > cu CtxPopCurrent( & dummy ) ) ;
if ( ret < 0 )
return ret ;
@ -292,14 +304,15 @@ static av_cold void uninit(AVFilterContext *ctx)
{
int i ;
ThumbnailCudaContext * s = ctx - > priv ;
CudaFunctions * cu = s - > hwctx - > internal - > cuda_dl ;
if ( s - > data ) {
CHECK_CU ( cuMemFree ( s - > data ) ) ;
CHECK_CU ( cu - > cu MemFree( s - > data ) ) ;
s - > data = 0 ;
}
if ( s - > cu_module ) {
CHECK_CU ( cuModuleUnload ( s - > cu_module ) ) ;
CHECK_CU ( cu - > cu ModuleUnload( s - > cu_module ) ) ;
s - > cu_module = NULL ;
}
@ -342,43 +355,43 @@ static int config_props(AVFilterLink *inlink)
AVHWFramesContext * hw_frames_ctx = ( AVHWFramesContext * ) inlink - > hw_frames_ctx - > data ;
AVCUDADeviceContext * device_hwctx = hw_frames_ctx - > device_ctx - > hwctx ;
CUcontext dummy , cuda_ctx = device_hwctx - > cuda_ctx ;
CudaFunctions * cu = device_hwctx - > internal - > cuda_dl ;
int ret ;
extern char vf_thumbnail_cuda_ptx [ ] ;
ret = CHECK_CU ( cuCtxPushCurrent ( cuda_ctx ) ) ;
s - > hwctx = device_hwctx ;
s - > cu_stream = s - > hwctx - > stream ;
ret = CHECK_CU ( cu - > cuCtxPushCurrent ( cuda_ctx ) ) ;
if ( ret < 0 )
return ret ;
ret = CHECK_CU ( cuModuleLoadData ( & s - > cu_module , vf_thumbnail_cuda_ptx ) ) ;
ret = CHECK_CU ( cu - > cu ModuleLoadData( & s - > cu_module , vf_thumbnail_cuda_ptx ) ) ;
if ( ret < 0 )
return ret ;
CHECK_CU ( cuModuleGetFunction ( & s - > cu_func_uchar , s - > cu_module , " Thumbnail_uchar " ) ) ;
CHECK_CU ( cuModuleGetFunction ( & s - > cu_func_uchar2 , s - > cu_module , " Thumbnail_uchar2 " ) ) ;
CHECK_CU ( cuModuleGetFunction ( & s - > cu_func_ushort , s - > cu_module , " Thumbnail_ushort " ) ) ;
CHECK_CU ( cuModuleGetFunction ( & s - > cu_func_ushort2 , s - > cu_module , " Thumbnail_ushort2 " ) ) ;
ret = CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func_uchar , s - > cu_module , " Thumbnail_uchar " ) ) ;
if ( ret < 0 )
return ret ;
CHECK_CU ( cuModuleGetTexRef ( & s - > cu_tex_uchar , s - > cu_module , " uchar_tex " ) ) ;
CHECK_CU ( cuModuleGetTexRef ( & s - > cu_tex_uchar2 , s - > cu_module , " uchar2_tex " ) ) ;
CHECK_CU ( cuModuleGetTexRef ( & s - > cu_tex_ushort , s - > cu_module , " ushort_tex " ) ) ;
CHECK_CU ( cuModuleGetTexRef ( & s - > cu_tex_ushort2 , s - > cu_module , " ushort2_tex " ) ) ;
ret = CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func_uchar2 , s - > cu_module , " Thumbnail_uchar2 " ) ) ;
if ( ret < 0 )
return ret ;
CHECK_CU ( cuTexRefSetFlags ( s - > cu_tex_uchar , CU_TRSF_READ_AS_INTEGER ) ) ;
CHECK_CU ( cuTexRefSetFlags ( s - > cu_tex_uchar2 , CU_TRSF_READ_AS_INTEGER ) ) ;
CHECK_CU ( cuTexRefSetFlags ( s - > cu_tex_ushort , CU_TRSF_READ_AS_INTEGER ) ) ;
CHECK_CU ( cuTexRefSetFlags ( s - > cu_tex_ushort2 , CU_TRSF_READ_AS_INTEGER ) ) ;
ret = CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func_ushort , s - > cu_module , " Thumbnail_ushort " ) ) ;
if ( ret < 0 )
return ret ;
CHECK_CU ( cuTexRefSetFilterMode ( s - > cu_tex_uchar , CU_TR_FILTER_MODE_LINEAR ) ) ;
CHECK_CU ( cuTexRefSetFilterMode ( s - > cu_tex_uchar2 , CU_TR_FILTER_MODE_LINEAR ) ) ;
CHECK_CU ( cuTexRefSetFilterMode ( s - > cu_tex_ushort , CU_TR_FILTER_MODE_LINEAR ) ) ;
CHECK_CU ( cuTexRefSetFilterMode ( s - > cu_tex_ushort2 , CU_TR_FILTER_MODE_LINEAR ) ) ;
ret = CHECK_CU ( cu - > cuModuleGetFunction ( & s - > cu_func_ushort2 , s - > cu_module , " Thumbnail_ushort2 " ) ) ;
if ( ret < 0 )
return ret ;
ret = CHECK_CU ( cuMemAlloc ( & s - > data , HIST_SIZE * sizeof ( int ) ) ) ;
ret = CHECK_CU ( cu - > cu MemAlloc( & s - > data , HIST_SIZE * sizeof ( int ) ) ) ;
if ( ret < 0 )
return ret ;
CHECK_CU ( cuCtxPopCurrent ( & dummy ) ) ;
CHECK_CU ( cu - > cu CtxPopCurrent( & dummy ) ) ;
s - > hw_frames_ctx = ctx - > inputs [ 0 ] - > hw_frames_ctx ;