avfilter/scale_cuda: frame crop support

The crop filter has no effect on scale_cuda:

-vf crop=100:100,scale_cuda=100x100

Hardware frames (AV_PIX_FMT_FLAG_HWACCEL) are expected to use the crop_* properties,
as seen in the implementation vf_crop.c.

Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
master
Koushik Dutta 4 months ago committed by Timo Rothenpieler
parent 1864025458
commit 0cdcbab9e9
  1. 2
      libavfilter/version.h
  2. 15
      libavfilter/vf_scale_cuda.c
  3. 22
      libavfilter/vf_scale_cuda.cu

@ -32,7 +32,7 @@
#include "version_major.h" #include "version_major.h"
#define LIBAVFILTER_VERSION_MINOR 6 #define LIBAVFILTER_VERSION_MINOR 6
#define LIBAVFILTER_VERSION_MICRO 100 #define LIBAVFILTER_VERSION_MICRO 101
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \

@ -407,7 +407,7 @@ fail:
} }
static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
CUtexObject src_tex[4], int src_width, int src_height, CUtexObject src_tex[4], int src_left, int src_top, int src_width, int src_height,
AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch) AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch)
{ {
CUDAScaleContext *s = ctx->priv; CUDAScaleContext *s = ctx->priv;
@ -422,7 +422,7 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
&src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3], &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
&dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3], &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
&dst_width, &dst_height, &dst_pitch, &dst_width, &dst_height, &dst_pitch,
&src_width, &src_height, &s->param &src_left, &src_top, &src_width, &src_height, &s->param
}; };
return CHECK_CU(cu->cuLaunchKernel(func, return CHECK_CU(cu->cuLaunchKernel(func,
@ -440,6 +440,9 @@ static int scalecuda_resize(AVFilterContext *ctx,
CUtexObject tex[4] = { 0, 0, 0, 0 }; CUtexObject tex[4] = { 0, 0, 0, 0 };
int crop_width = (in->width - in->crop_right) - in->crop_left;
int crop_height = (in->height - in->crop_bottom) - in->crop_top;
ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
if (ret < 0) if (ret < 0)
return ret; return ret;
@ -477,7 +480,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
// scale primary plane(s). Usually Y (and A), or single plane of RGB frames. // scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
ret = call_resize_kernel(ctx, s->cu_func, ret = call_resize_kernel(ctx, s->cu_func,
tex, in->width, in->height, tex, in->crop_left, in->crop_top, crop_width, crop_height,
out, out->width, out->height, out->linesize[0]); out, out->width, out->height, out->linesize[0]);
if (ret < 0) if (ret < 0)
goto exit; goto exit;
@ -485,8 +488,10 @@ static int scalecuda_resize(AVFilterContext *ctx,
if (s->out_planes > 1) { if (s->out_planes > 1) {
// scale UV plane. Scale function sets both U and V plane, or singular interleaved plane. // scale UV plane. Scale function sets both U and V plane, or singular interleaved plane.
ret = call_resize_kernel(ctx, s->cu_func_uv, tex, ret = call_resize_kernel(ctx, s->cu_func_uv, tex,
AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w), AV_CEIL_RSHIFT(in->crop_left, s->in_desc->log2_chroma_w),
AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h), AV_CEIL_RSHIFT(in->crop_top, s->in_desc->log2_chroma_h),
AV_CEIL_RSHIFT(crop_width, s->in_desc->log2_chroma_w),
AV_CEIL_RSHIFT(crop_height, s->in_desc->log2_chroma_h),
out, out,
AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w), AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w),
AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h), AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h),

@ -26,6 +26,7 @@
template<typename T> template<typename T>
using subsample_function_t = T (*)(cudaTextureObject_t tex, int xo, int yo, using subsample_function_t = T (*)(cudaTextureObject_t tex, int xo, int yo,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_left, int src_top,
int src_width, int src_height, int src_width, int src_height,
int bit_depth, float param); int bit_depth, float param);
@ -64,11 +65,12 @@ static inline __device__ ushort conv_16to10(ushort in)
subsample_function_t<in_T_uv> subsample_func_uv> \ subsample_function_t<in_T_uv> subsample_func_uv> \
__device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], int xo, int yo, \ __device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], int xo, int yo, \
int dst_width, int dst_height, int dst_pitch, \ int dst_width, int dst_height, int dst_pitch, \
int src_width, int src_height, float param) int src_left, int src_top, int src_width, int src_height, float param)
#define SUB_F(m, plane) \ #define SUB_F(m, plane) \
subsample_func_##m(src_tex[plane], xo, yo, \ subsample_func_##m(src_tex[plane], xo, yo, \
dst_width, dst_height, \ dst_width, dst_height, \
src_left, src_top, \
src_width, src_height, \ src_width, src_height, \
in_bit_depth, param) in_bit_depth, param)
@ -1063,13 +1065,14 @@ template<typename T>
__device__ static inline T Subsample_Nearest(cudaTextureObject_t tex, __device__ static inline T Subsample_Nearest(cudaTextureObject_t tex,
int xo, int yo, int xo, int yo,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_left, int src_top,
int src_width, int src_height, int src_width, int src_height,
int bit_depth, float param) int bit_depth, float param)
{ {
float hscale = (float)src_width / (float)dst_width; float hscale = (float)src_width / (float)dst_width;
float vscale = (float)src_height / (float)dst_height; float vscale = (float)src_height / (float)dst_height;
float xi = (xo + 0.5f) * hscale; float xi = (xo + 0.5f) * hscale + src_left;
float yi = (yo + 0.5f) * vscale; float yi = (yo + 0.5f) * vscale + src_top;
return tex2D<T>(tex, xi, yi); return tex2D<T>(tex, xi, yi);
} }
@ -1078,13 +1081,14 @@ template<typename T>
__device__ static inline T Subsample_Bilinear(cudaTextureObject_t tex, __device__ static inline T Subsample_Bilinear(cudaTextureObject_t tex,
int xo, int yo, int xo, int yo,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_left, int src_top,
int src_width, int src_height, int src_width, int src_height,
int bit_depth, float param) int bit_depth, float param)
{ {
float hscale = (float)src_width / (float)dst_width; float hscale = (float)src_width / (float)dst_width;
float vscale = (float)src_height / (float)dst_height; float vscale = (float)src_height / (float)dst_height;
float xi = (xo + 0.5f) * hscale; float xi = (xo + 0.5f) * hscale + src_left;
float yi = (yo + 0.5f) * vscale; float yi = (yo + 0.5f) * vscale + src_top;
// 3-tap filter weights are {wh,1.0,wh} and {wv,1.0,wv} // 3-tap filter weights are {wh,1.0,wh} and {wv,1.0,wv}
float wh = min(max(0.5f * (hscale - 1.0f), 0.0f), 1.0f); float wh = min(max(0.5f * (hscale - 1.0f), 0.0f), 1.0f);
float wv = min(max(0.5f * (vscale - 1.0f), 0.0f), 1.0f); float wv = min(max(0.5f * (vscale - 1.0f), 0.0f), 1.0f);
@ -1109,13 +1113,14 @@ template<typename T, coeffs_function_t coeffs_function>
__device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex, __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
int xo, int yo, int xo, int yo,
int dst_width, int dst_height, int dst_width, int dst_height,
int src_left, int src_top,
int src_width, int src_height, int src_width, int src_height,
int bit_depth, float param) int bit_depth, float param)
{ {
float hscale = (float)src_width / (float)dst_width; float hscale = (float)src_width / (float)dst_width;
float vscale = (float)src_height / (float)dst_height; float vscale = (float)src_height / (float)dst_height;
float xi = (xo + 0.5f) * hscale - 0.5f; float xi = (xo + 0.5f) * hscale - 0.5f + src_left;
float yi = (yo + 0.5f) * vscale - 0.5f; float yi = (yo + 0.5f) * vscale - 0.5f + src_top;
float px = floor(xi); float px = floor(xi);
float py = floor(yi); float py = floor(yi);
float fx = xi - px; float fx = xi - px;
@ -1147,7 +1152,7 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \ cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \
T *dst_0, T *dst_1, T *dst_2, T *dst_3, \ T *dst_0, T *dst_1, T *dst_2, T *dst_3, \
int dst_width, int dst_height, int dst_pitch, \ int dst_width, int dst_height, int dst_pitch, \
int src_width, int src_height, float param int src_left, int src_top, int src_width, int src_height, float param
#define SUBSAMPLE(Convert, T) \ #define SUBSAMPLE(Convert, T) \
cudaTextureObject_t src_tex[4] = \ cudaTextureObject_t src_tex[4] = \
@ -1159,6 +1164,7 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
Convert( \ Convert( \
src_tex, dst, xo, yo, \ src_tex, dst, xo, yo, \
dst_width, dst_height, dst_pitch, \ dst_width, dst_height, dst_pitch, \
src_left, src_top, \
src_width, src_height, param); src_width, src_height, param);
extern "C" { extern "C" {

Loading…
Cancel
Save