vp9: do unscaled MC in scaled path if size of this reference matches.

This can happen if we do bidirectional MC, where one reference has the
same size as the current frame, but the other one doesn't.
pull/148/head
Ronald S. Bultje 10 years ago
parent 873dbc6758
commit 9cdeb105a6
  1. 219
      libavcodec/vp9.c

@ -2766,7 +2766,108 @@ static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv
intra_recon(ctx, y_off, uv_off, 2);
}
static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h, int bytesperpixel)
{
int mx = mv->x, my = mv->y, th;
y += my >> 3;
x += mx >> 3;
ref += y * ref_stride + x * bytesperpixel;
mx &= 7;
my &= 7;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th = (y + bh + 4 * !!my + 7) >> 6;
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < !!mx * 3 || y < !!my * 3 ||
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
160, ref_stride,
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
ref_stride = 160;
}
mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
}
static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
uint8_t *dst_u, uint8_t *dst_v,
ptrdiff_t dst_stride,
const uint8_t *ref_u, ptrdiff_t src_stride_u,
const uint8_t *ref_v, ptrdiff_t src_stride_v,
ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h, int bytesperpixel)
{
int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
y += my >> 4;
x += mx >> 4;
ref_u += y * src_stride_u + x * bytesperpixel;
ref_v += y * src_stride_v + x * bytesperpixel;
mx &= 15;
my &= 15;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < !!mx * 3 || y < !!my * 3 ||
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
160, src_stride_u,
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
160, src_stride_v,
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
} else {
mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
}
}
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
px, py, pw, ph, bw, bh, w, h, i) \
mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, bw, bh, w, h, bytesperpixel)
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, bytesperpixel)
#define SCALED 0
#define FN(x) x##_8bpp
#define BYTES_PER_PIXEL 1
#include "vp9_mc_template.c"
#undef FN
#undef BYTES_PER_PIXEL
#define FN(x) x##_16bpp
#define BYTES_PER_PIXEL 2
#include "vp9_mc_template.c"
#undef mc_luma_dir
#undef mc_chroma_dir
#undef FN
#undef BYTES_PER_PIXEL
#undef SCALED
static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
vp9_mc_func (*mc)[2],
uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
ThreadFrame *ref_frame,
@ -2775,6 +2876,11 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step)
{
if (s->frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
s->frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
mc_luma_unscaled(s, mc, dst, dst_stride, ref, ref_stride, ref_frame,
y, x, in_mv, bw, bh, w, h, bytesperpixel);
} else {
#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
int mx, my;
int refbw_m1, refbh_m1;
@ -2811,9 +2917,11 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
ref_stride = 288;
}
smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
}
}
static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
vp9_mc_func (*mc)[2],
uint8_t *dst_u, uint8_t *dst_v,
ptrdiff_t dst_stride,
const uint8_t *ref_u, ptrdiff_t src_stride_u,
@ -2824,6 +2932,12 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step)
{
if (s->frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
s->frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
mc_chroma_unscaled(s, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
ref_v, src_stride_v, ref_frame,
y, x, in_mv, bw, bh, w, h, bytesperpixel);
} else {
int mx, my;
int refbw_m1, refbh_m1;
int th;
@ -2879,16 +2993,17 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
}
}
}
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
px, py, pw, ph, bw, bh, w, h, i) \
mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define SCALED 1
@ -2906,106 +3021,6 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
#undef BYTES_PER_PIXEL
#undef SCALED
static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h, int bytesperpixel)
{
int mx = mv->x, my = mv->y, th;
y += my >> 3;
x += mx >> 3;
ref += y * ref_stride + x * bytesperpixel;
mx &= 7;
my &= 7;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th = (y + bh + 4 * !!my + 7) >> 6;
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < !!mx * 3 || y < !!my * 3 ||
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
160, ref_stride,
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
ref_stride = 160;
}
mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
}
static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
uint8_t *dst_u, uint8_t *dst_v,
ptrdiff_t dst_stride,
const uint8_t *ref_u, ptrdiff_t src_stride_u,
const uint8_t *ref_v, ptrdiff_t src_stride_v,
ThreadFrame *ref_frame,
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h, int bytesperpixel)
{
int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
y += my >> 4;
x += mx >> 4;
ref_u += y * src_stride_u + x * bytesperpixel;
ref_v += y * src_stride_v + x * bytesperpixel;
mx &= 15;
my &= 15;
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < !!mx * 3 || y < !!my * 3 ||
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
160, src_stride_u,
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
160, src_stride_v,
bw + !!mx * 7, bh + !!my * 7,
x - !!mx * 3, y - !!my * 3, w, h);
ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
} else {
mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
}
}
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
px, py, pw, ph, bw, bh, w, h, i) \
mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, bw, bh, w, h, bytesperpixel)
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, bw, bh, w, h, bytesperpixel)
#define SCALED 0
#define FN(x) x##_8bpp
#define BYTES_PER_PIXEL 1
#include "vp9_mc_template.c"
#undef FN
#undef BYTES_PER_PIXEL
#define FN(x) x##_16bpp
#define BYTES_PER_PIXEL 2
#include "vp9_mc_template.c"
#undef mc_luma_dir_dir
#undef mc_chroma_dir_dir
#undef FN
#undef BYTES_PER_PIXEL
#undef SCALED
static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
{
VP9Context *s = ctx->priv_data;

Loading…
Cancel
Save