vp9: Flip the order of arguments in MC functions

This makes it match the pattern already used for VP8 MC functions.

This also makes the signature match ffmpeg's version of these
functions, easing porting of code in both directions.

Signed-off-by: Martin Storsjö <martin@martin.st>
pull/258/head
Martin Storsjö 8 years ago
parent e3fb74f7f9
commit 2e55e26b40
  1. 5
      libavcodec/vp9.h
  2. 16
      libavcodec/vp9block.c
  3. 69
      libavcodec/vp9dsp.c
  4. 34
      libavcodec/x86/vp9dsp_init.c
  5. 20
      libavcodec/x86/vp9mc.asm
  6. 18
      tests/checkasm/vp9dsp.c

@ -127,9 +127,8 @@ typedef struct ProbContext {
uint8_t partition[4][4][3];
} ProbContext;
typedef void (*vp9_mc_func)(uint8_t *dst, const uint8_t *ref,
ptrdiff_t dst_stride,
ptrdiff_t ref_stride,
typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
int h, int mx, int my);
typedef struct VP9DSPContext {

@ -1187,7 +1187,7 @@ static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
ref_stride = 80;
}
mc[!!mx][!!my](dst, ref, dst_stride, ref_stride, bh, mx << 1, my << 1);
mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
}
static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
@ -1227,7 +1227,7 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
mc[!!mx][!!my](dst_u, ref_u, dst_stride, 80, bh, mx, my);
mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_v - !!my * 3 * src_stride_v - !!mx * 3,
@ -1236,10 +1236,10 @@ static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func(*mc)[2],
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
mc[!!mx][!!my](dst_v, ref_v, dst_stride, 80, bh, mx, my);
mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
} else {
mc[!!mx][!!my](dst_u, ref_u, dst_stride, src_stride_u, bh, mx, my);
mc[!!mx][!!my](dst_v, ref_v, dst_stride, src_stride_v, bh, mx, my);
mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
}
}
@ -1668,8 +1668,8 @@ int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col,
av_assert2(n <= 4);
if (w & bw) {
s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o,
s->tmp_y + o,
f->linesize[0],
s->tmp_y + o,
64, h, 0, 0);
o += bw;
}
@ -1686,12 +1686,12 @@ int ff_vp9_decode_block(AVCodecContext *avctx, int row, int col,
av_assert2(n <= 4);
if (w & bw) {
s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o,
s->tmp_uv[0] + o,
f->linesize[1],
s->tmp_uv[0] + o,
32, h, 0, 0);
s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o,
s->tmp_uv[1] + o,
f->linesize[2],
s->tmp_uv[1] + o,
32, h, 0, 0);
o += bw;
}

@ -1738,9 +1738,8 @@ static av_cold void vp9dsp_loopfilter_init(VP9DSPContext *dsp)
dsp->loop_filter_mix2[1][1][1] = loop_filter_v_88_16_c;
}
static av_always_inline void copy_c(uint8_t *dst, const uint8_t *src,
ptrdiff_t dst_stride,
ptrdiff_t src_stride,
static av_always_inline void copy_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h)
{
do {
@ -1751,9 +1750,8 @@ static av_always_inline void copy_c(uint8_t *dst, const uint8_t *src,
} while (--h);
}
static av_always_inline void avg_c(uint8_t *dst, const uint8_t *src,
ptrdiff_t dst_stride,
ptrdiff_t src_stride,
static av_always_inline void avg_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h)
{
do {
@ -1767,13 +1765,12 @@ static av_always_inline void avg_c(uint8_t *dst, const uint8_t *src,
} while (--h);
}
#define fpel_fn(type, sz) \
static void type ## sz ## _c(uint8_t *dst, const uint8_t *src, \
ptrdiff_t dst_stride, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
type ## _c(dst, src, dst_stride, src_stride, sz, h); \
#define fpel_fn(type, sz) \
static void type ## sz ## _c(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
type ## _c(dst, dst_stride, src, src_stride, sz, h); \
}
#define copy_avg_fn(sz) \
@ -1851,9 +1848,8 @@ static const int8_t vp9_subpel_filters[3][15][8] = {
F[6] * src[x + +3 * stride] + \
F[7] * src[x + +4 * stride] + 64) >> 7)
static av_always_inline void do_8tap_1d_c(uint8_t *dst, const uint8_t *src,
ptrdiff_t dst_stride,
ptrdiff_t src_stride,
static av_always_inline void do_8tap_1d_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds,
const int8_t *filter, int avg)
{
@ -1873,13 +1869,13 @@ static av_always_inline void do_8tap_1d_c(uint8_t *dst, const uint8_t *src,
#define filter_8tap_1d_fn(opn, opa, dir, ds) \
static av_noinline void opn ## _8tap_1d_ ## dir ## _c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int w, int h, \
const int8_t *filter) \
{ \
do_8tap_1d_c(dst, src, dst_stride, src_stride, w, h, ds, filter, opa); \
do_8tap_1d_c(dst, dst_stride, src, src_stride, w, h, ds, filter, opa); \
}
filter_8tap_1d_fn(put, 0, v, src_stride)
@ -1889,9 +1885,8 @@ filter_8tap_1d_fn(avg, 1, h, 1)
#undef filter_8tap_1d_fn
static av_always_inline void do_8tap_2d_c(uint8_t *dst, const uint8_t *src,
ptrdiff_t dst_stride,
ptrdiff_t src_stride,
static av_always_inline void do_8tap_2d_c(uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *src, ptrdiff_t src_stride,
int w, int h, const int8_t *filterx,
const int8_t *filtery, int avg)
{
@ -1926,14 +1921,14 @@ static av_always_inline void do_8tap_2d_c(uint8_t *dst, const uint8_t *src,
#define filter_8tap_2d_fn(opn, opa) \
static av_noinline void opn ## _8tap_2d_hv_c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int w, int h, \
const int8_t *filterx, \
const int8_t *filtery) \
{ \
do_8tap_2d_c(dst, src, dst_stride, src_stride, \
do_8tap_2d_c(dst, dst_stride, src, src_stride, \
w, h, filterx, filtery, opa); \
}
@ -1947,23 +1942,23 @@ filter_8tap_2d_fn(avg, 1)
#define filter_fn_1d(sz, dir, dir_m, type, type_idx, avg) \
static void \
avg ## _8tap_ ## type ## _ ## sz ## dir ## _c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg ## _8tap_1d_ ## dir ## _c(dst, src, dst_stride, src_stride, sz, h, \
avg ## _8tap_1d_ ## dir ## _c(dst, dst_stride, src, src_stride, sz, h, \
vp9_subpel_filters[type_idx][dir_m - 1]); \
}
#define filter_fn_2d(sz, type, type_idx, avg) \
static void avg ## _8tap_ ## type ## _ ## sz ## hv_c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg ## _8tap_2d_hv_c(dst, src, dst_stride, src_stride, sz, h, \
avg ## _8tap_2d_hv_c(dst, dst_stride, src, src_stride, sz, h, \
vp9_subpel_filters[type_idx][mx - 1], \
vp9_subpel_filters[type_idx][my - 1]); \
}
@ -1972,8 +1967,8 @@ static void avg ## _8tap_ ## type ## _ ## sz ## hv_c(uint8_t *dst, \
(src[x] + ((mxy * (src[x + stride] - src[x]) + 8) >> 4))
static av_always_inline void do_bilin_1d_c(uint8_t *dst,
const uint8_t *src,
ptrdiff_t dst_stride,
const uint8_t *src,
ptrdiff_t src_stride,
int w, int h, ptrdiff_t ds,
int mxy, int avg)
@ -1994,12 +1989,12 @@ static av_always_inline void do_bilin_1d_c(uint8_t *dst,
#define bilin_1d_fn(opn, opa, dir, ds) \
static av_noinline void opn ## _bilin_1d_ ## dir ## _c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int w, int h, int mxy) \
{ \
do_bilin_1d_c(dst, src, dst_stride, src_stride, w, h, ds, mxy, opa); \
do_bilin_1d_c(dst, dst_stride, src, src_stride, w, h, ds, mxy, opa); \
}
bilin_1d_fn(put, 0, v, src_stride)
@ -2010,8 +2005,8 @@ bilin_1d_fn(avg, 1, h, 1)
#undef bilin_1d_fn
static av_always_inline void do_bilin_2d_c(uint8_t *dst,
const uint8_t *src,
ptrdiff_t dst_stride,
const uint8_t *src,
ptrdiff_t src_stride,
int w, int h, int mx, int my,
int avg)
@ -2046,13 +2041,13 @@ static av_always_inline void do_bilin_2d_c(uint8_t *dst,
#define bilin_2d_fn(opn, opa) \
static av_noinline void opn ## _bilin_2d_hv_c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int w, int h, \
int mx, int my) \
{ \
do_bilin_2d_c(dst, src, dst_stride, src_stride, w, h, mx, my, opa); \
do_bilin_2d_c(dst, dst_stride, src, src_stride, w, h, mx, my, opa); \
}
bilin_2d_fn(put, 0)
@ -2064,23 +2059,23 @@ bilin_2d_fn(avg, 1)
#define bilinf_fn_1d(sz, dir, dir_m, avg) \
static void avg ## _bilin_ ## sz ## dir ## _c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg ## _bilin_1d_ ## dir ## _c(dst, src, dst_stride, src_stride, \
avg ## _bilin_1d_ ## dir ## _c(dst, dst_stride, src, src_stride, \
sz, h, dir_m); \
}
#define bilinf_fn_2d(sz, avg) \
static void avg ## _bilin_ ## sz ## hv_c(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
avg ## _bilin_2d_hv_c(dst, src, dst_stride, src_stride, \
avg ## _bilin_2d_hv_c(dst, dst_stride, src, src_stride, \
sz, h, mx, my); \
}

@ -29,10 +29,9 @@
#if HAVE_YASM
#define fpel_func(avg, sz, opt) \
void ff_vp9_ ## avg ## sz ## _ ## opt(uint8_t *dst, const uint8_t *src, \
ptrdiff_t dst_stride, \
ptrdiff_t src_stride, \
#define fpel_func(avg, sz, opt) \
void ff_vp9_ ## avg ## sz ## _ ## opt(uint8_t *dst, ptrdiff_t dst_stride, \
const uint8_t *src, ptrdiff_t src_stride, \
int h, int mx, int my)
fpel_func(put, 4, mmx);
@ -54,8 +53,8 @@ fpel_func(avg, 64, avx2);
#define mc_func(avg, sz, dir, opt, type, f_sz) \
void \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, \
const type (*filter)[f_sz])
@ -81,20 +80,21 @@ mc_funcs(32, avx2, int8_t, 32);
#define mc_rep_func(avg, sz, hsz, dir, opt, type, f_sz) \
static av_always_inline void \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, \
const type (*filter)[f_sz]) \
{ \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, src, \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, \
dst_stride, \
src, \
src_stride, \
h, \
filter); \
ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst + hsz, \
src + hsz, \
dst_stride, \
src + hsz, \
src_stride, \
h, filter); \
}
@ -126,19 +126,18 @@ extern const int16_t ff_filters_sse2[3][15][8][8];
#define filter_8tap_2d_fn(op, sz, f, f_opt, fname, align, opt) \
static void \
op ## _8tap_ ## fname ## _ ## sz ## hv_ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, int my) \
{ \
LOCAL_ALIGNED_ ## align(uint8_t, temp, [71 * 64]); \
ff_vp9_put_8tap_1d_h_ ## sz ## _ ## opt(temp, src - 3 * src_stride, \
64, src_stride, \
h + 7, \
ff_vp9_put_8tap_1d_h_ ## sz ## _ ## opt(temp, 64, \
src - 3 * src_stride, \
src_stride, h + 7, \
ff_filters_ ## f_opt[f][mx - 1]); \
ff_vp9_ ## op ## _8tap_1d_v_ ## sz ## _ ## opt(dst, temp + 3 * 64, \
dst_stride, 64, \
h, \
ff_vp9_ ## op ## _8tap_1d_v_ ## sz ## _ ## opt(dst, dst_stride, \
temp + 3 * 64, 64, h, \
ff_filters_ ## f_opt[f][my - 1]); \
}
@ -173,14 +172,15 @@ filters_8tap_2d_fn(avg, 32, 32, avx2, ssse3)
#define filter_8tap_1d_fn(op, sz, f, f_opt, fname, dir, dvar, opt) \
static void \
op ## _8tap_ ## fname ## _ ## sz ## dir ## _ ## opt(uint8_t *dst, \
const uint8_t *src, \
ptrdiff_t dst_stride, \
const uint8_t *src, \
ptrdiff_t src_stride, \
int h, int mx, \
int my) \
{ \
ff_vp9_ ## op ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(dst, src, \
ff_vp9_ ## op ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(dst, \
dst_stride, \
src, \
src_stride, h,\
ff_filters_ ## f_opt[f][dvar - 1]); \
}

@ -107,7 +107,7 @@ SECTION .text
%macro filter_sse2_h_fn 1
%assign %%px mmsize/2
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 15, dst, src, dstride, sstride, h, filtery
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 15, dst, dstride, src, sstride, h, filtery
pxor m5, m5
mova m6, [pw_64]
mova m7, [filteryq+ 0]
@ -192,7 +192,7 @@ filter_sse2_h_fn avg
%macro filter_h_fn 1
%assign %%px mmsize/2
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, src, dstride, sstride, h, filtery
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 11, dst, dstride, src, sstride, h, filtery
mova m6, [pw_256]
mova m7, [filteryq+ 0]
%if ARCH_X86_64 && mmsize > 8
@ -253,7 +253,7 @@ filter_h_fn avg
%if ARCH_X86_64
%macro filter_hx2_fn 1
%assign %%px mmsize
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, src, dstride, sstride, h, filtery
cglobal vp9_%1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, dstride, src, sstride, h, filtery
mova m13, [pw_256]
mova m8, [filteryq+ 0]
mova m9, [filteryq+32]
@ -315,9 +315,9 @@ filter_hx2_fn avg
%macro filter_sse2_v_fn 1
%assign %%px mmsize/2
%if ARCH_X86_64
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 15, dst, src, dstride, sstride, h, filtery, src4, sstride3
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 15, dst, dstride, src, sstride, h, filtery, src4, sstride3
%else
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 15, dst, src, dstride, sstride, filtery, src4, sstride3
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 15, dst, dstride, src, sstride, filtery, src4, sstride3
mov filteryq, r5mp
%define hd r4mp
%endif
@ -413,9 +413,9 @@ filter_sse2_v_fn avg
%macro filter_v_fn 1
%assign %%px mmsize/2
%if ARCH_X86_64
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, src, dstride, sstride, h, filtery, src4, sstride3
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 11, dst, dstride, src, sstride, h, filtery, src4, sstride3
%else
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, src, dstride, sstride, filtery, src4, sstride3
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 4, 7, 11, dst, dstride, src, sstride, filtery, src4, sstride3
mov filteryq, r5mp
%define hd r4mp
%endif
@ -486,7 +486,7 @@ filter_v_fn avg
%macro filter_vx2_fn 1
%assign %%px mmsize
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, src, dstride, sstride, h, filtery, src4, sstride3
cglobal vp9_%1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, dstride, src, sstride, h, filtery, src4, sstride3
mova m13, [pw_256]
lea sstride3q, [sstrideq*3]
lea src4q, [srcq+sstrideq]
@ -562,11 +562,11 @@ filter_vx2_fn avg
%endif
%if %2 <= mmsize
cglobal vp9_%1%2, 5, 7, 4, dst, src, dstride, sstride, h, dstride3, sstride3
cglobal vp9_%1%2, 5, 7, 4, dst, dstride, src, sstride, h, dstride3, sstride3
lea sstride3q, [sstrideq*3]
lea dstride3q, [dstrideq*3]
%else
cglobal vp9_%1%2, 5, 5, 4, dst, src, dstride, sstride, h
cglobal vp9_%1%2, 5, 5, 4, dst, dstride, src, sstride, h
%endif
.loop:
%%srcfn m0, [srcq]

@ -228,8 +228,8 @@ static void check_mc(void)
int op, hsize, filter, dx, dy;
declare_func_emms(AV_CPU_FLAG_MMX | AV_CPU_FLAG_MMXEXT,
void, uint8_t *dst, const uint8_t *ref,
ptrdiff_t dst_stride, ptrdiff_t ref_stride,
void, uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
int h, int mx, int my);
for (op = 0; op < 2; op++) {
@ -252,13 +252,11 @@ static void check_mc(void)
int mx = dx ? 1 + (rnd() % 14) : 0;
int my = dy ? 1 + (rnd() % 14) : 0;
randomize_buffers();
call_ref(dst0, src,
size * SIZEOF_PIXEL,
SRC_BUF_STRIDE * SIZEOF_PIXEL,
call_ref(dst0, size * SIZEOF_PIXEL,
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
call_new(dst1, src,
size * SIZEOF_PIXEL,
SRC_BUF_STRIDE * SIZEOF_PIXEL,
call_new(dst1, size * SIZEOF_PIXEL,
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
if (memcmp(dst0, dst1, DST_BUF_SIZE))
fail();
@ -267,8 +265,8 @@ static void check_mc(void)
// functions are identical
if (filter >= 1 && filter <= 2) continue;
bench_new(dst1, src, size * SIZEOF_PIXEL,
SRC_BUF_STRIDE * SIZEOF_PIXEL,
bench_new(dst1, size * SIZEOF_PIXEL,
src, SRC_BUF_STRIDE * SIZEOF_PIXEL,
size, mx, my);
}
}

Loading…
Cancel
Save