@ -47,15 +47,22 @@
# include "libavutil/pixdesc.h"
# include "unsharp.h"
static void apply_unsharp ( uint8_t * dst , int dst_stride ,
const uint8_t * src , int src_stride ,
int width , int height , UnsharpFilterParam * fp )
typedef struct TheadData {
UnsharpFilterParam * fp ;
uint8_t * dst ;
const uint8_t * src ;
int dst_stride ;
int src_stride ;
int width ;
int height ;
} ThreadData ;
static int unsharp_slice ( AVFilterContext * ctx , void * arg , int jobnr , int nb_jobs )
{
ThreadData * td = arg ;
UnsharpFilterParam * fp = td - > fp ;
uint32_t * * sc = fp - > sc ;
uint32_t sr [ MAX_MATRIX_SIZE - 1 ] , tmp1 , tmp2 ;
int32_t res ;
int x , y , z ;
uint32_t * sr = fp - > sr ;
const uint8_t * src2 = NULL ; //silence a warning
const int amount = fp - > amount ;
const int steps_x = fp - > steps_x ;
@ -63,30 +70,54 @@ static void apply_unsharp( uint8_t *dst, int dst_stride,
const int scalebits = fp - > scalebits ;
const int32_t halfscale = fp - > halfscale ;
uint8_t * dst = td - > dst ;
const uint8_t * src = td - > src ;
const int dst_stride = td - > dst_stride ;
const int src_stride = td - > src_stride ;
const int width = td - > width ;
const int height = td - > height ;
const int sc_offset = jobnr * 2 * steps_y ;
const int sr_offset = jobnr * ( MAX_MATRIX_SIZE - 1 ) ;
const int slice_start = ( height * jobnr ) / nb_jobs ;
const int slice_end = ( height * ( jobnr + 1 ) ) / nb_jobs ;
int32_t res ;
int x , y , z ;
uint32_t tmp1 , tmp2 ;
if ( ! amount ) {
av_image_copy_plane ( dst , dst_stride , src , src_stride , width , height ) ;
return ;
av_image_copy_plane ( dst + slice_start * dst_stride , dst_stride ,
src + slice_start * src_stride , src_stride ,
width , slice_end - slice_start ) ;
return 0 ;
}
for ( y = 0 ; y < 2 * steps_y ; y + + )
memset ( sc [ y ] , 0 , sizeof ( sc [ y ] [ 0 ] ) * ( width + 2 * steps_x ) ) ;
memset ( sc [ sc_offset + y ] , 0 , sizeof ( sc [ y ] [ 0 ] ) * ( width + 2 * steps_x ) ) ;
for ( y = - steps_y ; y < height + steps_y ; y + + ) {
// if this is not the first tile, we start from (slice_start - steps_y),
// so we can get smooth result at slice boundary
if ( slice_start > steps_y ) {
src + = ( slice_start - steps_y ) * src_stride ;
dst + = ( slice_start - steps_y ) * dst_stride ;
}
for ( y = - steps_y + slice_start ; y < steps_y + slice_end ; y + + ) {
if ( y < height )
src2 = src ;
memset ( sr , 0 , sizeof ( sr [ 0 ] ) * ( 2 * steps_x - 1 ) ) ;
memset ( sr + sr_offset , 0 , sizeof ( sr [ 0 ] ) * ( 2 * steps_x - 1 ) ) ;
for ( x = - steps_x ; x < width + steps_x ; x + + ) {
tmp1 = x < = 0 ? src2 [ 0 ] : x > = width ? src2 [ width - 1 ] : src2 [ x ] ;
for ( z = 0 ; z < steps_x * 2 ; z + = 2 ) {
tmp2 = sr [ z + 0 ] + tmp1 ; sr [ z + 0 ] = tmp1 ;
tmp1 = sr [ z + 1 ] + tmp2 ; sr [ z + 1 ] = tmp2 ;
tmp2 = sr [ sr_offset + z + 0 ] + tmp1 ; sr [ sr_offset + z + 0 ] = tmp1 ;
tmp1 = sr [ sr_offset + z + 1 ] + tmp2 ; sr [ sr_offset + z + 1 ] = tmp2 ;
}
for ( z = 0 ; z < steps_y * 2 ; z + = 2 ) {
tmp2 = sc [ z + 0 ] [ x + steps_x ] + tmp1 ; sc [ z + 0 ] [ x + steps_x ] = tmp1 ;
tmp1 = sc [ z + 1 ] [ x + steps_x ] + tmp2 ; sc [ z + 1 ] [ x + steps_x ] = tmp2 ;
tmp2 = sc [ sc_offset + z + 0 ] [ x + steps_x ] + tmp1 ; sc [ sc_offset + z + 0 ] [ x + steps_x ] = tmp1 ;
tmp1 = sc [ sc_offset + z + 1 ] [ x + steps_x ] + tmp2 ; sc [ sc_offset + z + 1 ] [ x + steps_x ] = tmp2 ;
}
if ( x > = steps_x & & y > = steps_y ) {
if ( x > = steps_x & & y > = ( steps_y + slice_start ) ) {
const uint8_t * srx = src - steps_y * src_stride + x - steps_x ;
uint8_t * dsx = dst - steps_y * dst_stride + x - steps_x ;
@ -99,6 +130,7 @@ static void apply_unsharp( uint8_t *dst, int dst_stride,
src + = src_stride ;
}
}
return 0 ;
}
static int apply_unsharp_c ( AVFilterContext * ctx , AVFrame * in , AVFrame * out )
@ -107,6 +139,8 @@ static int apply_unsharp_c(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
UnsharpContext * s = ctx - > priv ;
int i , plane_w [ 3 ] , plane_h [ 3 ] ;
UnsharpFilterParam * fp [ 3 ] ;
ThreadData td ;
plane_w [ 0 ] = inlink - > w ;
plane_w [ 1 ] = plane_w [ 2 ] = AV_CEIL_RSHIFT ( inlink - > w , s - > hsub ) ;
plane_h [ 0 ] = inlink - > h ;
@ -114,7 +148,14 @@ static int apply_unsharp_c(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
fp [ 0 ] = & s - > luma ;
fp [ 1 ] = fp [ 2 ] = & s - > chroma ;
for ( i = 0 ; i < 3 ; i + + ) {
apply_unsharp ( out - > data [ i ] , out - > linesize [ i ] , in - > data [ i ] , in - > linesize [ i ] , plane_w [ i ] , plane_h [ i ] , fp [ i ] ) ;
td . fp = fp [ i ] ;
td . dst = out - > data [ i ] ;
td . src = in - > data [ i ] ;
td . width = plane_w [ i ] ;
td . height = plane_h [ i ] ;
td . dst_stride = out - > linesize [ i ] ;
td . src_stride = in - > linesize [ i ] ;
ctx - > internal - > execute ( ctx , unsharp_slice , & td , NULL , FFMIN ( plane_h [ i ] , s - > nb_threads ) ) ;
}
return 0 ;
}
@ -163,6 +204,7 @@ static int query_formats(AVFilterContext *ctx)
static int init_filter_param ( AVFilterContext * ctx , UnsharpFilterParam * fp , const char * effect_type , int width )
{
int z ;
UnsharpContext * s = ctx - > priv ;
const char * effect = fp - > amount = = 0 ? " none " : fp - > amount < 0 ? " blur " : " sharpen " ;
if ( ! ( fp - > msize_x & fp - > msize_y & 1 ) ) {
@ -175,7 +217,12 @@ static int init_filter_param(AVFilterContext *ctx, UnsharpFilterParam *fp, const
av_log ( ctx , AV_LOG_VERBOSE , " effect:%s type:%s msize_x:%d msize_y:%d amount:%0.2f \n " ,
effect , effect_type , fp - > msize_x , fp - > msize_y , fp - > amount / 65535.0 ) ;
for ( z = 0 ; z < 2 * fp - > steps_y ; z + + )
fp - > sr = av_malloc_array ( ( MAX_MATRIX_SIZE - 1 ) * s - > nb_threads , sizeof ( uint32_t ) ) ;
fp - > sc = av_malloc_array ( 2 * fp - > steps_y * s - > nb_threads , sizeof ( uint32_t * * ) ) ;
if ( ! fp - > sr | | ! fp - > sc )
return AVERROR ( ENOMEM ) ;
for ( z = 0 ; z < 2 * fp - > steps_y * s - > nb_threads ; z + + )
if ( ! ( fp - > sc [ z ] = av_malloc_array ( width + 2 * fp - > steps_x ,
sizeof ( * ( fp - > sc [ z ] ) ) ) ) )
return AVERROR ( ENOMEM ) ;
@ -192,6 +239,11 @@ static int config_props(AVFilterLink *link)
s - > hsub = desc - > log2_chroma_w ;
s - > vsub = desc - > log2_chroma_h ;
// ensure (height / nb_threads) > 4 * steps_y,
// so that we don't have too much overlap between two threads
s - > nb_threads = FFMIN ( ff_filter_get_nb_threads ( link - > dst ) ,
link - > h / ( 4 * s - > luma . steps_y ) ) ;
ret = init_filter_param ( link - > dst , & s - > luma , " luma " , link - > w ) ;
if ( ret < 0 )
return ret ;
@ -202,20 +254,22 @@ static int config_props(AVFilterLink *link)
return 0 ;
}
static void free_filter_param ( UnsharpFilterParam * fp )
static void free_filter_param ( UnsharpFilterParam * fp , int nb_threads )
{
int z ;
for ( z = 0 ; z < 2 * fp - > steps_y ; z + + )
for ( z = 0 ; z < 2 * fp - > steps_y * nb_threads ; z + + )
av_freep ( & fp - > sc [ z ] ) ;
av_freep ( & fp - > sc ) ;
av_freep ( & fp - > sr ) ;
}
static av_cold void uninit ( AVFilterContext * ctx )
{
UnsharpContext * s = ctx - > priv ;
free_filter_param ( & s - > luma ) ;
free_filter_param ( & s - > chroma ) ;
free_filter_param ( & s - > luma , s - > nb_threads ) ;
free_filter_param ( & s - > chroma , s - > nb_threads ) ;
}
static int filter_frame ( AVFilterLink * link , AVFrame * in )
@ -294,5 +348,5 @@ AVFilter ff_vf_unsharp = {
. query_formats = query_formats ,
. inputs = avfilter_vf_unsharp_inputs ,
. outputs = avfilter_vf_unsharp_outputs ,
. flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC ,
. flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS ,
} ;