@ -49,31 +49,33 @@ fpel_func(avg, 32, avx2);
fpel_func ( avg , 64 , avx2 ) ;
# undef fpel_func
# define mc_func(avg, sz, dir, opt) \
# define mc_func(avg, sz, dir, opt, type, f_sz ) \
void ff_vp9_ # # avg # # _8tap_1d_ # # dir # # _ # # sz # # _ # # opt ( uint8_t * dst , ptrdiff_t dst_stride , \
const uint8_t * src , ptrdiff_t src_stride , \
int h , const int8_t ( * filter ) [ 32 ] )
# define mc_funcs(sz, opt) \
mc_func ( put , sz , h , opt ) ; \
mc_func ( avg , sz , h , opt ) ; \
mc_func ( put , sz , v , opt ) ; \
mc_func ( avg , sz , v , opt )
mc_funcs ( 4 , ssse3 ) ;
mc_funcs ( 8 , ssse3 ) ;
int h , const type ( * filter ) [ f_sz ] )
# define mc_funcs(sz, opt, type, fsz) \
mc_func ( put , sz , h , opt , type , fsz ) ; \
mc_func ( avg , sz , h , opt , type , fsz ) ; \
mc_func ( put , sz , v , opt , type , fsz ) ; \
mc_func ( avg , sz , v , opt , type , fsz )
mc_funcs ( 4 , mmxext , int16_t , 8 ) ;
mc_funcs ( 8 , sse2 , int16_t , 8 ) ;
mc_funcs ( 4 , ssse3 , int8_t , 32 ) ;
mc_funcs ( 8 , ssse3 , int8_t , 32 ) ;
# if ARCH_X86_64
mc_funcs ( 16 , ssse3 ) ;
mc_funcs ( 32 , avx2 ) ;
mc_funcs ( 16 , ssse3 , int8_t , 32 ) ;
mc_funcs ( 32 , avx2 , int8_t , 32 ) ;
# endif
# undef mc_funcs
# undef mc_func
# define mc_rep_func(avg, sz, hsz, dir, opt) \
# define mc_rep_func(avg, sz, hsz, dir, opt, type, f_sz ) \
static av_always_inline void \
ff_vp9_ # # avg # # _8tap_1d_ # # dir # # _ # # sz # # _ # # opt ( uint8_t * dst , ptrdiff_t dst_stride , \
const uint8_t * src , ptrdiff_t src_stride , \
int h , const int8_t ( * filter ) [ 32 ] ) \
int h , const type ( * filter ) [ f_sz ] ) \
{ \
ff_vp9_ # # avg # # _8tap_1d_ # # dir # # _ # # hsz # # _ # # opt ( dst , dst_stride , src , \
src_stride , h , filter ) ; \
@ -81,95 +83,103 @@ ff_vp9_##avg##_8tap_1d_##dir##_##sz##_##opt(uint8_t *dst, ptrdiff_t dst_stride,
src_stride , h , filter ) ; \
}
# define mc_rep_funcs(sz, hsz, opt) \
mc_rep_func ( put , sz , hsz , h , opt ) ; \
mc_rep_func ( avg , sz , hsz , h , opt ) ; \
mc_rep_func ( put , sz , hsz , v , opt ) ; \
mc_rep_func ( avg , sz , hsz , v , opt )
# define mc_rep_funcs(sz, hsz, opt, type, fsz ) \
mc_rep_func ( put , sz , hsz , h , opt , type , fsz ) ; \
mc_rep_func ( avg , sz , hsz , h , opt , type , fsz ) ; \
mc_rep_func ( put , sz , hsz , v , opt , type , fsz ) ; \
mc_rep_func ( avg , sz , hsz , v , opt , type , fsz )
mc_rep_funcs ( 16 , 8 , sse2 , int16_t , 8 ) ;
# if ARCH_X86_32
mc_rep_funcs ( 16 , 8 , ssse3 ) ;
mc_rep_funcs ( 16 , 8 , ssse3 , int8_t , 32 ) ;
# endif
mc_rep_funcs ( 32 , 16 , ssse3 ) ;
mc_rep_funcs ( 64 , 32 , ssse3 ) ;
mc_rep_funcs ( 32 , 16 , sse2 , int16_t , 8 ) ;
mc_rep_funcs ( 32 , 16 , ssse3 , int8_t , 32 ) ;
mc_rep_funcs ( 64 , 32 , sse2 , int16_t , 8 ) ;
mc_rep_funcs ( 64 , 32 , ssse3 , int8_t , 32 ) ;
# if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
mc_rep_funcs ( 64 , 32 , avx2 ) ;
mc_rep_funcs ( 64 , 32 , avx2 , int8_t , 32 ) ;
# endif
# undef mc_rep_funcs
# undef mc_rep_func
extern const int8_t ff_filters_ssse3 [ 3 ] [ 15 ] [ 4 ] [ 32 ] ;
extern const int16_t ff_filters_sse2 [ 3 ] [ 15 ] [ 8 ] [ 8 ] ;
# define filter_8tap_2d_fn(op, sz, f, fname, align, opt) \
# define filter_8tap_2d_fn(op, sz, f, f_opt, f name, align, opt) \
static void op # # _8tap_ # # fname # # _ # # sz # # hv_ # # opt ( uint8_t * dst , ptrdiff_t dst_stride , \
const uint8_t * src , ptrdiff_t src_stride , \
int h , int mx , int my ) \
{ \
LOCAL_ALIGNED_ # # align ( uint8_t , temp , [ 71 * 64 ] ) ; \
ff_vp9_put_8tap_1d_h_ # # sz # # _ # # opt ( temp , 64 , src - 3 * src_stride , src_stride , \
h + 7 , ff_filters_ssse3 [ f ] [ mx - 1 ] ) ; \
h + 7 , ff_filters_ # # f_opt [ f ] [ mx - 1 ] ) ; \
ff_vp9_ # # op # # _8tap_1d_v_ # # sz # # _ # # opt ( dst , dst_stride , temp + 3 * 64 , 64 , \
h , ff_filters_ssse3 [ f ] [ my - 1 ] ) ; \
h , ff_filters_ # # f_opt [ f ] [ my - 1 ] ) ; \
}
# define filters_8tap_2d_fn(op, sz, align, opt) \
filter_8tap_2d_fn ( op , sz , FILTER_8TAP_REGULAR , regular , align , opt ) \
filter_8tap_2d_fn ( op , sz , FILTER_8TAP_SHARP , sharp , align , opt ) \
filter_8tap_2d_fn ( op , sz , FILTER_8TAP_SMOOTH , smooth , align , opt )
# define filters_8tap_2d_fn2(op, align, opt) \
filters_8tap_2d_fn ( op , 64 , align , opt ) \
filters_8tap_2d_fn ( op , 32 , align , opt ) \
filters_8tap_2d_fn ( op , 16 , align , opt ) \
filters_8tap_2d_fn ( op , 8 , align , opt ) \
filters_8tap_2d_fn ( op , 4 , align , opt )
filters_8tap_2d_fn2 ( put , 16 , ssse3 )
filters_8tap_2d_fn2 ( avg , 16 , ssse3 )
# define filters_8tap_2d_fn(op, sz, align, opt, f_opt) \
filter_8tap_2d_fn ( op , sz , FILTER_8TAP_REGULAR , f_opt , regular , align , opt ) \
filter_8tap_2d_fn ( op , sz , FILTER_8TAP_SHARP , f_opt , sharp , align , opt ) \
filter_8tap_2d_fn ( op , sz , FILTER_8TAP_SMOOTH , f_opt , smooth , align , opt )
# define filters_8tap_2d_fn2(op, align, opt4, opt8, f_opt) \
filters_8tap_2d_fn ( op , 64 , align , opt8 , f_opt ) \
filters_8tap_2d_fn ( op , 32 , align , opt8 , f_opt ) \
filters_8tap_2d_fn ( op , 16 , align , opt8 , f_opt ) \
filters_8tap_2d_fn ( op , 8 , align , opt8 , f_opt ) \
filters_8tap_2d_fn ( op , 4 , align , opt4 , f_opt )
filters_8tap_2d_fn2 ( put , 16 , mmxext , sse2 , sse2 )
filters_8tap_2d_fn2 ( avg , 16 , mmxext , sse2 , sse2 )
filters_8tap_2d_fn2 ( put , 16 , ssse3 , ssse3 , ssse3 )
filters_8tap_2d_fn2 ( avg , 16 , ssse3 , ssse3 , ssse3 )
# if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
filters_8tap_2d_fn ( put , 64 , 32 , avx2 )
filters_8tap_2d_fn ( put , 32 , 32 , avx2 )
filters_8tap_2d_fn ( avg , 64 , 32 , avx2 )
filters_8tap_2d_fn ( avg , 32 , 32 , avx2 )
filters_8tap_2d_fn ( put , 64 , 32 , avx2 , ssse3 )
filters_8tap_2d_fn ( put , 32 , 32 , avx2 , ssse3 )
filters_8tap_2d_fn ( avg , 64 , 32 , avx2 , ssse3 )
filters_8tap_2d_fn ( avg , 32 , 32 , avx2 , ssse3 )
# endif
# undef filters_8tap_2d_fn2
# undef filters_8tap_2d_fn
# undef filter_8tap_2d_fn
# define filter_8tap_1d_fn(op, sz, f, fname, dir, dvar, opt) \
# define filter_8tap_1d_fn(op, sz, f, f_opt, f name, dir, dvar, opt) \
static void op # # _8tap_ # # fname # # _ # # sz # # dir # # _ # # opt ( uint8_t * dst , ptrdiff_t dst_stride , \
const uint8_t * src , ptrdiff_t src_stride , \
int h , int mx , int my ) \
{ \
ff_vp9_ # # op # # _8tap_1d_ # # dir # # _ # # sz # # _ # # opt ( dst , dst_stride , src , src_stride , \
h , ff_filters_ssse3 [ f ] [ dvar - 1 ] ) ; \
h , ff_filters_ # # f_opt [ f ] [ dvar - 1 ] ) ; \
}
# define filters_8tap_1d_fn(op, sz, dir, dvar, opt) \
filter_8tap_1d_fn ( op , sz , FILTER_8TAP_REGULAR , regular , dir , dvar , opt ) \
filter_8tap_1d_fn ( op , sz , FILTER_8TAP_SHARP , sharp , dir , dvar , opt ) \
filter_8tap_1d_fn ( op , sz , FILTER_8TAP_SMOOTH , smooth , dir , dvar , opt )
# define filters_8tap_1d_fn2(op, sz, opt) \
filters_8tap_1d_fn ( op , sz , h , mx , opt ) \
filters_8tap_1d_fn ( op , sz , v , my , opt )
# define filters_8tap_1d_fn3(op, opt) \
filters_8tap_1d_fn2 ( op , 64 , opt ) \
filters_8tap_1d_fn2 ( op , 32 , opt ) \
filters_8tap_1d_fn2 ( op , 16 , opt ) \
filters_8tap_1d_fn2 ( op , 8 , opt ) \
filters_8tap_1d_fn2 ( op , 4 , opt )
filters_8tap_1d_fn3 ( put , ssse3 )
filters_8tap_1d_fn3 ( avg , ssse3 )
# define filters_8tap_1d_fn(op, sz, dir, dvar, opt, f_opt) \
filter_8tap_1d_fn ( op , sz , FILTER_8TAP_REGULAR , f_opt , regular , dir , dvar , opt ) \
filter_8tap_1d_fn ( op , sz , FILTER_8TAP_SHARP , f_opt , sharp , dir , dvar , opt ) \
filter_8tap_1d_fn ( op , sz , FILTER_8TAP_SMOOTH , f_opt , smooth , dir , dvar , opt )
# define filters_8tap_1d_fn2(op, sz, opt, f_opt) \
filters_8tap_1d_fn ( op , sz , h , mx , opt , f_opt ) \
filters_8tap_1d_fn ( op , sz , v , my , opt , f_opt )
# define filters_8tap_1d_fn3(op, opt4, opt8, f_opt) \
filters_8tap_1d_fn2 ( op , 64 , opt8 , f_opt ) \
filters_8tap_1d_fn2 ( op , 32 , opt8 , f_opt ) \
filters_8tap_1d_fn2 ( op , 16 , opt8 , f_opt ) \
filters_8tap_1d_fn2 ( op , 8 , opt8 , f_opt ) \
filters_8tap_1d_fn2 ( op , 4 , opt4 , f_opt )
filters_8tap_1d_fn3 ( put , mmxext , sse2 , sse2 )
filters_8tap_1d_fn3 ( avg , mmxext , sse2 , sse2 )
filters_8tap_1d_fn3 ( put , ssse3 , ssse3 , ssse3 )
filters_8tap_1d_fn3 ( avg , ssse3 , ssse3 , ssse3 )
# if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
filters_8tap_1d_fn2 ( put , 64 , avx2 )
filters_8tap_1d_fn2 ( put , 32 , avx2 )
filters_8tap_1d_fn2 ( avg , 64 , avx2 )
filters_8tap_1d_fn2 ( avg , 32 , avx2 )
filters_8tap_1d_fn2 ( put , 64 , avx2 , ssse3 )
filters_8tap_1d_fn2 ( put , 32 , avx2 , ssse3 )
filters_8tap_1d_fn2 ( avg , 64 , avx2 , ssse3 )
filters_8tap_1d_fn2 ( avg , 32 , avx2 , ssse3 )
# endif
# undef filters_8tap_1d_fn
@ -297,20 +307,23 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
dsp - > mc [ idx1 ] [ FILTER_8TAP_REGULAR ] [ idx2 ] [ idxh ] [ idxv ] = type # # _8tap_regular_ # # sz # # dir # # _ # # opt ; \
dsp - > mc [ idx1 ] [ FILTER_8TAP_SHARP ] [ idx2 ] [ idxh ] [ idxv ] = type # # _8tap_sharp_ # # sz # # dir # # _ # # opt
# define init_subpel2_32_64(idx, idxh, idxv, dir, type, opt) \
init_subpel1 ( 0 , idx , idxh , idxv , 64 , dir , type , opt ) ; \
init_subpel1 ( 1 , idx , idxh , idxv , 32 , dir , type , opt )
# define init_subpel2(idx1, idx2, sz, type, opt) \
init_subpel1 ( idx1 , idx2 , 1 , 1 , sz , hv , type , opt ) ; \
init_subpel1 ( idx1 , idx2 , 0 , 1 , sz , v , type , opt ) ; \
init_subpel1 ( idx1 , idx2 , 1 , 0 , sz , h , type , opt )
# define init_subpel3_32_64(idx, type, opt) \
init_subpel2 ( 0 , idx , 64 , type , opt ) ; \
init_subpel2 ( 1 , idx , 32 , type , opt )
# define init_subpel2(idx, idxh, idxv, dir, type, opt) \
init_subpel2_32_64 ( idx , idxh , idxv , dir , type , opt ) ; \
init_subpel1 ( 2 , idx , idxh , idxv , 16 , dir , type , opt ) ; \
init_subpel1 ( 3 , idx , idxh , idxv , 8 , dir , type , opt ) ; \
init_subpel1 ( 4 , idx , idxh , idxv , 4 , dir , type , opt )
# define init_subpel3_8to64(idx, type, opt) \
init_subpel3_32_64 ( idx , type , opt ) ; \
init_subpel2 ( 2 , idx , 16 , type , opt ) ; \
init_subpel2 ( 3 , idx , 8 , type , opt )
# define init_subpel3(idx, type, opt) \
init_subpel2 ( idx , 1 , 1 , hv , type , opt ) ; \
init_subpel2 ( idx , 0 , 1 , v , type , opt ) ; \
init_subpel2 ( idx , 1 , 0 , h , type , opt )
init_subpel3_8to64 ( idx , type , opt ) ; \
init_subpel2 ( 4 , idx , 4 , type , opt )
# define init_lpf(opt) do { \
if ( ARCH_X86_64 ) { \
@ -357,6 +370,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
}
if ( EXTERNAL_MMXEXT ( cpu_flags ) ) {
init_subpel2 ( 4 , 0 , 4 , put , mmxext ) ;
init_subpel2 ( 4 , 1 , 4 , avg , mmxext ) ;
init_fpel ( 4 , 1 , 4 , avg , mmxext ) ;
init_fpel ( 3 , 1 , 8 , avg , mmxext ) ;
dsp - > itxfm_add [ TX_4X4 ] [ DCT_DCT ] = ff_vp9_idct_idct_4x4_add_mmxext ;
@ -369,6 +384,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
}
if ( EXTERNAL_SSE2 ( cpu_flags ) ) {
init_subpel3_8to64 ( 0 , put , sse2 ) ;
init_subpel3_8to64 ( 1 , avg , sse2 ) ;
init_fpel ( 2 , 1 , 16 , avg , sse2 ) ;
init_fpel ( 1 , 1 , 32 , avg , sse2 ) ;
init_fpel ( 0 , 1 , 64 , avg , sse2 ) ;
@ -444,12 +461,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_fpel ( 0 , 1 , 64 , avg , avx2 ) ;
if ( ARCH_X86_64 ) {
# if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
init_subpel2_32_64 ( 0 , 1 , 1 , hv , put , avx2 ) ;
init_subpel2_32_64 ( 0 , 0 , 1 , v , put , avx2 ) ;
init_subpel2_32_64 ( 0 , 1 , 0 , h , put , avx2 ) ;
init_subpel2_32_64 ( 1 , 1 , 1 , hv , avg , avx2 ) ;
init_subpel2_32_64 ( 1 , 0 , 1 , v , avg , avx2 ) ;
init_subpel2_32_64 ( 1 , 1 , 0 , h , avg , avx2 ) ;
init_subpel3_32_64 ( 0 , put , avx2 ) ;
init_subpel3_32_64 ( 1 , avg , avx2 ) ;
# endif
}
dsp - > intra_pred [ TX_32X32 ] [ DC_PRED ] = ff_vp9_ipred_dc_32x32_avx2 ;