diff --git a/libavcodec/vp9.h b/libavcodec/vp9.h index 25bb2d159b..84bed6daaf 100644 --- a/libavcodec/vp9.h +++ b/libavcodec/vp9.h @@ -127,9 +127,8 @@ typedef struct ProbContext { uint8_t partition[4][4][3]; } ProbContext; -typedef void (*vp9_mc_func)(uint8_t *dst, const uint8_t *ref, - ptrdiff_t dst_stride, - ptrdiff_t ref_stride, +typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my); typedef struct VP9DSPContext { diff --git a/libavcodec/vp9block.c b/libavcodec/vp9block.c index cd40c38989..5a3b35649f 100644 --- a/libavcodec/vp9block.c +++ b/libavcodec/vp9block.c @@ -1187,7 +1187,7 @@ static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func(*mc)[2], ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; ref_stride = 80; } - mc[!!mx][!!my](dst, ref, dst_stride, ref_stride, bh, mx << 1, my << 1); + mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1); } static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2], @@ -1227,7 +1227,7 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2], bw + !!mx * 7, bh + !!my * 7, x - !!mx * 3, y - !!my * 3, w, h); ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; - mc[!!mx][!!my](dst_u, ref_u, dst_stride, 80, bh, mx, my); + mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my); s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ref_v - !!my * 3 * src_stride_v - !!mx * 3, @@ -1236,10 +1236,10 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2], bw + !!mx * 7, bh + !!my * 7, x - !!mx * 3, y - !!my * 3, w, h); ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3; - mc[!!mx][!!my](dst_v, ref_v, dst_stride, 80, bh, mx, my); + mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my); } else { - mc[!!mx][!!my](dst_u, ref_u, dst_stride, src_stride_u, bh, mx, my); - mc[!!mx][!!my](dst_v, ref_v, dst_stride, src_stride_v, bh, mx, my); + mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my); + mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my); } } @@ -1668,8 +1668,8 @@ int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col, av_assert2(n <= 4); if (w & bw) { s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, - s->tmp_y + o, f->linesize[0], + s->tmp_y + o, 64, h, 0, 0); o += bw; } @@ -1686,12 +1686,12 @@ int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col, av_assert2(n <= 4); if (w & bw) { s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, - s->tmp_uv[0] + o, f->linesize[1], + s->tmp_uv[0] + o, 32, h, 0, 0); s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, - s->tmp_uv[1] + o, f->linesize[2], + s->tmp_uv[1] + o, 32, h, 0, 0); o += bw; } diff --git a/libavcodec/vp9dsp.c b/libavcodec/vp9dsp.c index c83defeda3..af93c0d4ea 100644 --- a/libavcodec/vp9dsp.c +++ b/libavcodec/vp9dsp.c @@ -1738,9 +1738,8 @@ static av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp) dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c; } -static av_always_inline void copy_c(uint8_t *dst, const uint8_t *src, - ptrdiff_t dst_stride, - ptrdiff_t src_stride, +static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *src, ptrdiff_t src_stride, int w, int h) { do { @@ -1751,9 +1750,8 @@ static av_always_inline void copy_c(uint8_t *dst, const uint8_t *src, } while (--h); } -static av_always_inline void avg_c(uint8_t *dst, const uint8_t *src, - ptrdiff_t dst_stride, - ptrdiff_t src_stride, +static av_always_inline void avg_c(uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *src, ptrdiff_t src_stride, int w, int h) { do { @@ -1767,13 +1765,12 @@ static av_always_inline void avg_c(uint8_t *dst, const uint8_t *src, } while (--h); } -#define fpel_fn(type, sz) \ -static void type ## sz ## _c(uint8_t *dst, const uint8_t *src, \ - ptrdiff_t dst_stride, \ - ptrdiff_t src_stride, \ - int h, int mx, int my) \ -{ \ - type ## _c(dst, src, dst_stride, src_stride, sz, h); \ +#define fpel_fn(type, sz) \ +static void type ## sz ## _c(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ + int h, int mx, int my) \ +{ \ + type ## _c(dst, dst_stride, src, src_stride, sz, h); \ } #define copy_avg_fn(sz) \ @@ -1851,9 +1848,8 @@ static const int8_t vp9_subpel_filters[3][15][8] = { F[6] * src[x + +3 * stride] + \ F[7] * src[x + +4 * stride] + 64) >> 7) -static av_always_inline void do_8tap_1d_c(uint8_t *dst, const uint8_t *src, - ptrdiff_t dst_stride, - ptrdiff_t src_stride, +static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *src, ptrdiff_t src_stride, int w, int h, ptrdiff_t ds, const int8_t *filter, int avg) { @@ -1873,13 +1869,13 @@ static av_always_inline void do_8tap_1d_c(uint8_t *dst, const uint8_t *src, #define filter_8tap_1d_fn(opn, opa, dir, ds) \ static av_noinline void opn ## _8tap_1d_ ## dir ## _c(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int w, int h, \ const int8_t *filter) \ { \ - do_8tap_1d_c(dst, src, dst_stride, src_stride, w, h, ds, filter, opa); \ + do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \ } filter_8tap_1d_fn(put, 0, v, src_stride) @@ -1889,9 +1885,8 @@ filter_8tap_1d_fn(avg, 1, h, 1) #undef filter_8tap_1d_fn -static av_always_inline void do_8tap_2d_c(uint8_t *dst, const uint8_t *src, - ptrdiff_t dst_stride, - ptrdiff_t src_stride, +static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *src, ptrdiff_t src_stride, int w, int h, const int8_t *filterx, const int8_t *filtery, int avg) { @@ -1926,14 +1921,14 @@ static av_always_inline void do_8tap_2d_c(uint8_t *dst, const uint8_t *src, #define filter_8tap_2d_fn(opn, opa) \ static av_noinline void opn ## _8tap_2d_hv_c(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int w, int h, \ const int8_t *filterx, \ const int8_t *filtery) \ { \ - do_8tap_2d_c(dst, src, dst_stride, src_stride, \ + do_8tap_2d_c(dst, dst_stride, src, src_stride, \ w, h, filterx, filtery, opa); \ } @@ -1947,23 +1942,23 @@ filter_8tap_2d_fn(avg, 1) #define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \ static void \ avg ## _8tap_ ## type ## _ ## sz ## dir ## _c(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int h, int mx, int my) \ { \ - avg ## _8tap_1d_ ## dir ## _c(dst, src, dst_stride, src_stride, sz, h, \ + avg ## _8tap_1d_ ## dir ## _c(dst, dst_stride, src, src_stride, sz, h, \ vp9_subpel_filters[type_idx][dir_m - 1]); \ } #define filter_fn_2d(sz, type, type_idx, avg) \ static void avg ## _8tap_ ## type ## _ ## sz ## hv_c(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int h, int mx, int my) \ { \ - avg ## _8tap_2d_hv_c(dst, src, dst_stride, src_stride, sz, h, \ + avg ## _8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \ vp9_subpel_filters[type_idx][mx - 1], \ vp9_subpel_filters[type_idx][my - 1]); \ } @@ -1972,8 +1967,8 @@ static void avg ## _8tap_ ## type ## _ ## sz ## hv_c(uint8_t *dst, \ (src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4)) static av_always_inline void do_bilin_1d_c(uint8_t *dst, - const uint8_t *src, ptrdiff_t dst_stride, + const uint8_t *src, ptrdiff_t src_stride, int w, int h, ptrdiff_t ds, int mxy, int avg) @@ -1994,12 +1989,12 @@ static av_always_inline void do_bilin_1d_c(uint8_t *dst, #define bilin_1d_fn(opn, opa, dir, ds) \ static av_noinline void opn ## _bilin_1d_ ## dir ## _c(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int w, int h, int mxy) \ { \ - do_bilin_1d_c(dst, src, dst_stride, src_stride, w, h, ds, mxy, opa); \ + do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \ } bilin_1d_fn(put, 0, v, src_stride) @@ -2010,8 +2005,8 @@ bilin_1d_fn(avg, 1, h, 1) #undef bilin_1d_fn static av_always_inline void do_bilin_2d_c(uint8_t *dst, - const uint8_t *src, ptrdiff_t dst_stride, + const uint8_t *src, ptrdiff_t src_stride, int w, int h, int mx, int my, int avg) @@ -2046,13 +2041,13 @@ static av_always_inline void do_bilin_2d_c(uint8_t *dst, #define bilin_2d_fn(opn, opa) \ static av_noinline void opn ## _bilin_2d_hv_c(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int w, int h, \ int mx, int my) \ { \ - do_bilin_2d_c(dst, src, dst_stride, src_stride, w, h, mx, my, opa); \ + do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \ } bilin_2d_fn(put, 0) @@ -2064,23 +2059,23 @@ bilin_2d_fn(avg, 1) #define bilinf_fn_1d(sz, dir, dir_m, avg) \ static void avg ## _bilin_ ## sz ## dir ## _c(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int h, int mx, int my) \ { \ - avg ## _bilin_1d_ ## dir ## _c(dst, src, dst_stride, src_stride, \ + avg ## _bilin_1d_ ## dir ## _c(dst, dst_stride, src, src_stride, \ sz, h, dir_m); \ } #define bilinf_fn_2d(sz, avg) \ static void avg ## _bilin_ ## sz ## hv_c(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int h, int mx, int my) \ { \ - avg ## _bilin_2d_hv_c(dst, src, dst_stride, src_stride, \ + avg ## _bilin_2d_hv_c(dst, dst_stride, src, src_stride, \ sz, h, mx, my); \ } diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 3b9e1bb0ca..58aedcbd2d 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -29,10 +29,9 @@ #if HAVE_YASM -#define fpel_func(avg, sz, opt) \ -void ff_vp9_ ## avg ## sz ## _ ## opt(uint8_t *dst, const uint8_t *src, \ - ptrdiff_t dst_stride, \ - ptrdiff_t src_stride, \ +#define fpel_func(avg, sz, opt) \ +void ff_vp9_ ## avg ## sz ## _ ## opt(uint8_t *dst, ptrdiff_t dst_stride, \ + const uint8_t *src, ptrdiff_t src_stride, \ int h, int mx, int my) fpel_func(put, 4, mmx); @@ -54,8 +53,8 @@ fpel_func(avg, 64, avx2); #define mc_func(avg, sz, dir, opt, type, f_sz) \ void \ ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int h, \ const type (*filter)[f_sz]) @@ -81,20 +80,21 @@ mc_funcs(32, avx2, int8_t, 32); #define mc_rep_func(avg, sz, hsz, dir, opt, type, f_sz) \ static av_always_inline void \ ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int h, \ const type (*filter)[f_sz]) \ { \ - ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, src, \ + ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, \ dst_stride, \ + src, \ src_stride, \ h, \ filter); \ ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst + hsz, \ - src + hsz, \ dst_stride, \ + src + hsz, \ src_stride, \ h, filter); \ } @@ -126,19 +126,18 @@ extern const int16_t ff_filters_sse2[3][15][8][8]; #define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, opt) \ static void \ op ## _8tap_ ## fname ## _ ## sz ## hv_ ## opt(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int h, int mx, int my) \ { \ LOCAL_ALIGNED_ ## align(uint8_t, temp, [71 * 64]); \ - ff_vp9_put_8tap_1d_h_ ## sz ## _ ## opt(temp, src - 3 * src_stride, \ - 64, src_stride, \ - h + 7, \ + ff_vp9_put_8tap_1d_h_ ## sz ## _ ## opt(temp, 64, \ + src - 3 * src_stride, \ + src_stride, h + 7, \ ff_filters_ ## f_opt[f][mx - 1]); \ - ff_vp9_ ## op ## _8tap_1d_v_ ## sz ## _ ## opt(dst, temp + 3 * 64, \ - dst_stride, 64, \ - h, \ + ff_vp9_ ## op ## _8tap_1d_v_ ## sz ## _ ## opt(dst, dst_stride, \ + temp + 3 * 64, 64, h, \ ff_filters_ ## f_opt[f][my - 1]); \ } @@ -173,14 +172,15 @@ filters_8tap_2d_fn(avg, 32, 32, avx2, ssse3) #define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, opt) \ static void \ op ## _8tap_ ## fname ## _ ## sz ## dir ## _ ## opt(uint8_t *dst, \ - const uint8_t *src, \ ptrdiff_t dst_stride, \ + const uint8_t *src, \ ptrdiff_t src_stride, \ int h, int mx, \ int my) \ { \ - ff_vp9_ ## op ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(dst, src, \ + ff_vp9_ ## op ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(dst, \ dst_stride, \ + src, \ src_stride, h,\ ff_filters_ ## f_opt[f][dvar - 1]); \ } diff --git a/libavcodec/x86/vp9mc.asm b/libavcodec/x86/vp9mc.asm index 15e93ea6cb..c9701aea18 100644 --- a/libavcodec/x86/vp9mc.asm +++ b/libavcodec/x86/vp9mc.asm @@ -107,7 +107,7 @@ SECTION .text %macro filter_sse2_h_fn 1 %assign %%px mmsize/2 -cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 15, dst, src, dstride, sstride, h, filtery +cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 15, dst, dstride, src, sstride, h, filtery pxor m5, m5 mova m6, [pw_64] mova m7, [filteryq+ 0] @@ -192,7 +192,7 @@ filter_sse2_h_fn avg %macro filter_h_fn 1 %assign %%px mmsize/2 -cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, src, dstride, sstride, h, filtery +cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, dstride, src, sstride, h, filtery mova m6, [pw_256] mova m7, [filteryq+ 0] %if ARCH_X86_64 && mmsize > 8 @@ -253,7 +253,7 @@ filter_h_fn avg %if ARCH_X86_64 %macro filter_hx2_fn 1 %assign %%px mmsize -cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, src, dstride, sstride, h, filtery +cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, dstride, src, sstride, h, filtery mova m13, [pw_256] mova m8, [filteryq+ 0] mova m9, [filteryq+32] @@ -315,9 +315,9 @@ filter_hx2_fn avg %macro filter_sse2_v_fn 1 %assign %%px mmsize/2 %if ARCH_X86_64 -cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 15, dst, src, dstride, sstride, h, filtery, src4, sstride3 +cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 15, dst, dstride, src, sstride, h, filtery, src4, sstride3 %else -cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 15, dst, src, dstride, sstride, filtery, src4, sstride3 +cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 15, dst, dstride, src, sstride, filtery, src4, sstride3 mov filteryq, r5mp %define hd r4mp %endif @@ -413,9 +413,9 @@ filter_sse2_v_fn avg %macro filter_v_fn 1 %assign %%px mmsize/2 %if ARCH_X86_64 -cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, src, dstride, sstride, h, filtery, src4, sstride3 +cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, dstride, src, sstride, h, filtery, src4, sstride3 %else -cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, src, dstride, sstride, filtery, src4, sstride3 +cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, dstride, src, sstride, filtery, src4, sstride3 mov filteryq, r5mp %define hd r4mp %endif @@ -486,7 +486,7 @@ filter_v_fn avg %macro filter_vx2_fn 1 %assign %%px mmsize -cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, src, dstride, sstride, h, filtery, src4, sstride3 +cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, dstride, src, sstride, h, filtery, src4, sstride3 mova m13, [pw_256] lea sstride3q, [sstrideq*3] lea src4q, [srcq+sstrideq] @@ -562,11 +562,11 @@ filter_vx2_fn avg %endif %if %2 <= mmsize -cglobal vp9_%1%2, 5, 7, 4, dst, src, dstride, sstride, h, dstride3, sstride3 +cglobal vp9_%1%2, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3 lea sstride3q, [sstrideq*3] lea dstride3q, [dstrideq*3] %else -cglobal vp9_%1%2, 5, 5, 4, dst, src, dstride, sstride, h +cglobal vp9_%1%2, 5, 5, 4, dst, dstride, src, sstride, h %endif .loop: %%srcfn m0, [srcq] diff --git a/tests/checkasm/vp9dsp.c b/tests/checkasm/vp9dsp.c index f0d93725eb..f0cc2a7e45 100644 --- a/tests/checkasm/vp9dsp.c +++ b/tests/checkasm/vp9dsp.c @@ -228,8 +228,8 @@ static void check_mc(void) int op, hsize, filter, dx, dy; declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT, - void, uint8_t *dst, const uint8_t *ref, - ptrdiff_t dst_stride, ptrdiff_t ref_stride, + void, uint8_t *dst, ptrdiff_t dst_stride, + const uint8_t *ref, ptrdiff_t ref_stride, int h, int mx, int my); for (op = 0; op < 2; op++) { @@ -252,13 +252,11 @@ static void check_mc(void) int mx = dx ? 1 + (rnd() % 14) : 0; int my = dy ? 1 + (rnd() % 14) : 0; randomize_buffers(); - call_ref(dst0, src, - size * SIZEOF_PIXEL, - SRC_BUF_STRIDE * SIZEOF_PIXEL, + call_ref(dst0, size * SIZEOF_PIXEL, + src, SRC_BUF_STRIDE * SIZEOF_PIXEL, size, mx, my); - call_new(dst1, src, - size * SIZEOF_PIXEL, - SRC_BUF_STRIDE * SIZEOF_PIXEL, + call_new(dst1, size * SIZEOF_PIXEL, + src, SRC_BUF_STRIDE * SIZEOF_PIXEL, size, mx, my); if (memcmp(dst0, dst1, DST_BUF_SIZE)) fail(); @@ -267,8 +265,8 @@ static void check_mc(void) // functions are identical if (filter >= 1 && filter <= 2) continue; - bench_new(dst1, src, size * SIZEOF_PIXEL, - SRC_BUF_STRIDE * SIZEOF_PIXEL, + bench_new(dst1, size * SIZEOF_PIXEL, + src, SRC_BUF_STRIDE * SIZEOF_PIXEL, size, mx, my); } }