diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 2732e89c87..28539e02e5 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -73,8 +73,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += mpegaudiodsp.o \ mpegaudiodsp_data.o \ mpegaudiodsp_fixed.o \ mpegaudiodsp_float.o -OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideo_motion.o \ - mpegutils.o +OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \ + mpegvideo_motion.o mpegutils.o OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ motion_est.o ratecontrol.o OBJS-$(CONFIG_QPELDSP) += qpeldsp.o diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 89363aef77..ddab510868 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -3,8 +3,6 @@ * Copyright (c) 2000, 2001 Fabrice Bellard * Copyright (c) 2002-2004 Michael Niedermayer * - * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer - * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -415,92 +413,6 @@ static int sum_abs_dctelem_c(int16_t *block) #define avg2(a, b) ((a + b + 1) >> 1) #define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2) -static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, - int x16, int y16, int rounder) -{ - const int A = (16 - x16) * (16 - y16); - const int B = (x16) * (16 - y16); - const int C = (16 - x16) * (y16); - const int D = (x16) * (y16); - int i; - - for (i = 0; i < h; i++) { - dst[0] = (A * src[0] + B * src[1] + C * src[stride + 0] + D * src[stride + 1] + rounder) >> 8; - dst[1] = (A * src[1] + B * src[2] + C * src[stride + 1] + D * src[stride + 2] + rounder) >> 8; - dst[2] = (A * src[2] + B * src[3] + C * src[stride + 2] + D * src[stride + 3] + rounder) >> 8; - dst[3] = (A * src[3] + B * src[4] + C * src[stride + 3] + D * src[stride + 4] + rounder) >> 8; - dst[4] = (A * src[4] + B * src[5] + C * src[stride + 4] + D * src[stride + 5] + rounder) >> 8; - dst[5] = (A * src[5] + B * src[6] + C * src[stride + 5] + D * src[stride + 6] + rounder) >> 8; - dst[6] = (A * src[6] + B * src[7] + C * src[stride + 6] + D * src[stride + 7] + rounder) >> 8; - dst[7] = (A * src[7] + B * src[8] + C * src[stride + 7] + D * src[stride + 8] + rounder) >> 8; - dst += stride; - src += stride; - } -} - -void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, int shift, int r, - int width, int height) -{ - int y, vx, vy; - const int s = 1 << shift; - - width--; - height--; - - for (y = 0; y < h; y++) { - int x; - - vx = ox; - vy = oy; - for (x = 0; x < 8; x++) { // FIXME: optimize - int index; - int src_x = vx >> 16; - int src_y = vy >> 16; - int frac_x = src_x & (s - 1); - int frac_y = src_y & (s - 1); - - src_x >>= shift; - src_y >>= shift; - - if ((unsigned) src_x < width) { - if ((unsigned) src_y < height) { - index = src_x + src_y * stride; - dst[y * stride + x] = - ((src[index] * (s - frac_x) + - src[index + 1] * frac_x) * (s - frac_y) + - (src[index + stride] * (s - frac_x) + - src[index + stride + 1] * frac_x) * frac_y + - r) >> (shift * 2); - } else { - index = src_x + av_clip(src_y, 0, height) * stride; - dst[y * stride + x] = - ((src[index] * (s - frac_x) + - src[index + 1] * frac_x) * s + - r) >> (shift * 2); - } - } else { - if ((unsigned) src_y < height) { - index = av_clip(src_x, 0, width) + src_y * stride; - dst[y * stride + x] = - ((src[index] * (s - frac_y) + - src[index + stride] * frac_y) * s + - r) >> (shift * 2); - } else { - index = av_clip(src_x, 0, width) + - av_clip(src_y, 0, height) * stride; - dst[y * stride + x] = src[index]; - } - } - - vx += dxx; - vy += dyx; - } - ox += dxy; - oy += dyy; - } -} - static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { @@ -1500,9 +1412,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) c->sum_abs_dctelem = sum_abs_dctelem_c; - c->gmc1 = gmc1_c; - c->gmc = ff_gmc_c; - c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 4808a6bf37..aa182d33e4 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -34,9 +34,6 @@ extern uint32_t ff_square_tab[512]; -void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, int shift, int r, - int width, int height); /* minimum alignment rules ;) * If you notice errors in the align stuff, need more alignment for some ASM code @@ -97,18 +94,6 @@ typedef struct DSPContext { uint8_t *pixels /* align 8 */, int line_size); int (*sum_abs_dctelem)(int16_t *block /* align 16 */); - /** - * translational global motion compensation. - */ - void (*gmc1)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */, - int srcStride, int h, int x16, int y16, int rounder); - /** - * global motion compensation. - */ - void (*gmc)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */, - int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, - int shift, int r, int width, int height); int (*pix_sum)(uint8_t *pix, int line_size); int (*pix_norm1)(uint8_t *pix, int line_size); diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index ffea7b0d04..4d1a70f33c 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -383,6 +383,7 @@ av_cold int ff_dct_common_init(MpegEncContext *s) ff_dsputil_init(&s->dsp, s->avctx); ff_h264chroma_init(&s->h264chroma, 8); //for lowres ff_hpeldsp_init(&s->hdsp, s->avctx->flags); + ff_mpegvideodsp_init(&s->mdsp); ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample); if (s->avctx->debug & FF_DEBUG_NOMC) { diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index 18eab28209..8ec35457c6 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -36,6 +36,7 @@ #include "h264chroma.h" #include "h263dsp.h" #include "hpeldsp.h" +#include "mpegvideodsp.h" #include "put_bits.h" #include "ratecontrol.h" #include "parser.h" @@ -361,6 +362,7 @@ typedef struct MpegEncContext { DSPContext dsp; ///< pointers for accelerated dsp functions H264ChromaContext h264chroma; HpelDSPContext hdsp; + MpegVideoDSPContext mdsp; QpelDSPContext qdsp; VideoDSPContext vdsp; H263DSPContext h263dsp; diff --git a/libavcodec/mpegvideo_motion.c b/libavcodec/mpegvideo_motion.c index b006b99002..0d3ba08dee 100644 --- a/libavcodec/mpegvideo_motion.c +++ b/libavcodec/mpegvideo_motion.c @@ -26,7 +26,6 @@ #include "libavutil/avassert.h" #include "libavutil/internal.h" #include "avcodec.h" -#include "dsputil.h" #include "h261.h" #include "mpegutils.h" #include "mpegvideo.h" @@ -73,10 +72,10 @@ static void gmc1_motion(MpegEncContext *s, } if ((motion_x | motion_y) & 7) { - s->dsp.gmc1(dest_y, ptr, linesize, 16, - motion_x & 15, motion_y & 15, 128 - s->no_rounding); - s->dsp.gmc1(dest_y + 8, ptr + 8, linesize, 16, - motion_x & 15, motion_y & 15, 128 - s->no_rounding); + s->mdsp.gmc1(dest_y, ptr, linesize, 16, + motion_x & 15, motion_y & 15, 128 - s->no_rounding); + s->mdsp.gmc1(dest_y + 8, ptr + 8, linesize, 16, + motion_x & 15, motion_y & 15, 128 - s->no_rounding); } else { int dxy; @@ -116,8 +115,8 @@ static void gmc1_motion(MpegEncContext *s, ptr = s->edge_emu_buffer; emu = 1; } - s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, - motion_x & 15, motion_y & 15, 128 - s->no_rounding); + s->mdsp.gmc1(dest_cb, ptr, uvlinesize, 8, + motion_x & 15, motion_y & 15, 128 - s->no_rounding); ptr = ref_picture[2] + offset; if (emu) { @@ -128,8 +127,8 @@ static void gmc1_motion(MpegEncContext *s, s->h_edge_pos >> 1, s->v_edge_pos >> 1); ptr = s->edge_emu_buffer; } - s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, - motion_x & 15, motion_y & 15, 128 - s->no_rounding); + s->mdsp.gmc1(dest_cr, ptr, uvlinesize, 8, + motion_x & 15, motion_y & 15, 128 - s->no_rounding); } static void gmc_motion(MpegEncContext *s, @@ -151,19 +150,19 @@ static void gmc_motion(MpegEncContext *s, oy = s->sprite_offset[0][1] + s->sprite_delta[1][0] * s->mb_x * 16 + s->sprite_delta[1][1] * s->mb_y * 16; - s->dsp.gmc(dest_y, ptr, linesize, 16, - ox, oy, - s->sprite_delta[0][0], s->sprite_delta[0][1], - s->sprite_delta[1][0], s->sprite_delta[1][1], - a + 1, (1 << (2 * a + 1)) - s->no_rounding, - s->h_edge_pos, s->v_edge_pos); - s->dsp.gmc(dest_y + 8, ptr, linesize, 16, - ox + s->sprite_delta[0][0] * 8, - oy + s->sprite_delta[1][0] * 8, - s->sprite_delta[0][0], s->sprite_delta[0][1], - s->sprite_delta[1][0], s->sprite_delta[1][1], - a + 1, (1 << (2 * a + 1)) - s->no_rounding, - s->h_edge_pos, s->v_edge_pos); + s->mdsp.gmc(dest_y, ptr, linesize, 16, + ox, oy, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a + 1, (1 << (2 * a + 1)) - s->no_rounding, + s->h_edge_pos, s->v_edge_pos); + s->mdsp.gmc(dest_y + 8, ptr, linesize, 16, + ox + s->sprite_delta[0][0] * 8, + oy + s->sprite_delta[1][0] * 8, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a + 1, (1 << (2 * a + 1)) - s->no_rounding, + s->h_edge_pos, s->v_edge_pos); if (CONFIG_GRAY && s->flags & CODEC_FLAG_GRAY) return; @@ -174,20 +173,20 @@ static void gmc_motion(MpegEncContext *s, s->sprite_delta[1][1] * s->mb_y * 8; ptr = ref_picture[1]; - s->dsp.gmc(dest_cb, ptr, uvlinesize, 8, - ox, oy, - s->sprite_delta[0][0], s->sprite_delta[0][1], - s->sprite_delta[1][0], s->sprite_delta[1][1], - a + 1, (1 << (2 * a + 1)) - s->no_rounding, - s->h_edge_pos >> 1, s->v_edge_pos >> 1); + s->mdsp.gmc(dest_cb, ptr, uvlinesize, 8, + ox, oy, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a + 1, (1 << (2 * a + 1)) - s->no_rounding, + s->h_edge_pos >> 1, s->v_edge_pos >> 1); ptr = ref_picture[2]; - s->dsp.gmc(dest_cr, ptr, uvlinesize, 8, - ox, oy, - s->sprite_delta[0][0], s->sprite_delta[0][1], - s->sprite_delta[1][0], s->sprite_delta[1][1], - a + 1, (1 << (2 * a + 1)) - s->no_rounding, - s->h_edge_pos >> 1, s->v_edge_pos >> 1); + s->mdsp.gmc(dest_cr, ptr, uvlinesize, 8, + ox, oy, + s->sprite_delta[0][0], s->sprite_delta[0][1], + s->sprite_delta[1][0], s->sprite_delta[1][1], + a + 1, (1 << (2 * a + 1)) - s->no_rounding, + s->h_edge_pos >> 1, s->v_edge_pos >> 1); } static inline int hpel_motion(MpegEncContext *s, diff --git a/libavcodec/mpegvideodsp.c b/libavcodec/mpegvideodsp.c new file mode 100644 index 0000000000..a58e45ad43 --- /dev/null +++ b/libavcodec/mpegvideodsp.c @@ -0,0 +1,119 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/common.h" +#include "mpegvideodsp.h" + +static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, + int x16, int y16, int rounder) +{ + const int A = (16 - x16) * (16 - y16); + const int B = (x16) * (16 - y16); + const int C = (16 - x16) * (y16); + const int D = (x16) * (y16); + int i; + + for (i = 0; i < h; i++) { + dst[0] = (A * src[0] + B * src[1] + C * src[stride + 0] + D * src[stride + 1] + rounder) >> 8; + dst[1] = (A * src[1] + B * src[2] + C * src[stride + 1] + D * src[stride + 2] + rounder) >> 8; + dst[2] = (A * src[2] + B * src[3] + C * src[stride + 2] + D * src[stride + 3] + rounder) >> 8; + dst[3] = (A * src[3] + B * src[4] + C * src[stride + 3] + D * src[stride + 4] + rounder) >> 8; + dst[4] = (A * src[4] + B * src[5] + C * src[stride + 4] + D * src[stride + 5] + rounder) >> 8; + dst[5] = (A * src[5] + B * src[6] + C * src[stride + 5] + D * src[stride + 6] + rounder) >> 8; + dst[6] = (A * src[6] + B * src[7] + C * src[stride + 6] + D * src[stride + 7] + rounder) >> 8; + dst[7] = (A * src[7] + B * src[8] + C * src[stride + 7] + D * src[stride + 8] + rounder) >> 8; + dst += stride; + src += stride; + } +} + +void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, + int width, int height) +{ + int y, vx, vy; + const int s = 1 << shift; + + width--; + height--; + + for (y = 0; y < h; y++) { + int x; + + vx = ox; + vy = oy; + for (x = 0; x < 8; x++) { // FIXME: optimize + int index; + int src_x = vx >> 16; + int src_y = vy >> 16; + int frac_x = src_x & (s - 1); + int frac_y = src_y & (s - 1); + + src_x >>= shift; + src_y >>= shift; + + if ((unsigned) src_x < width) { + if ((unsigned) src_y < height) { + index = src_x + src_y * stride; + dst[y * stride + x] = + ((src[index] * (s - frac_x) + + src[index + 1] * frac_x) * (s - frac_y) + + (src[index + stride] * (s - frac_x) + + src[index + stride + 1] * frac_x) * frac_y + + r) >> (shift * 2); + } else { + index = src_x + av_clip(src_y, 0, height) * stride; + dst[y * stride + x] = + ((src[index] * (s - frac_x) + + src[index + 1] * frac_x) * s + + r) >> (shift * 2); + } + } else { + if ((unsigned) src_y < height) { + index = av_clip(src_x, 0, width) + src_y * stride; + dst[y * stride + x] = + ((src[index] * (s - frac_y) + + src[index + stride] * frac_y) * s + + r) >> (shift * 2); + } else { + index = av_clip(src_x, 0, width) + + av_clip(src_y, 0, height) * stride; + dst[y * stride + x] = src[index]; + } + } + + vx += dxx; + vy += dyx; + } + ox += dxy; + oy += dyy; + } +} + +av_cold void ff_mpegvideodsp_init(MpegVideoDSPContext *c) +{ + c->gmc1 = gmc1_c; + c->gmc = ff_gmc_c; + + if (ARCH_PPC) + ff_mpegvideodsp_init_ppc(c); + if (ARCH_X86) + ff_mpegvideodsp_init_x86(c); +} diff --git a/libavcodec/mpegvideodsp.h b/libavcodec/mpegvideodsp.h new file mode 100644 index 0000000000..293e2548d3 --- /dev/null +++ b/libavcodec/mpegvideodsp.h @@ -0,0 +1,47 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_MPEGVIDEODSP_H +#define AVCODEC_MPEGVIDEODSP_H + +#include + +void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, int shift, int r, + int width, int height); + +typedef struct MpegVideoDSPContext { + /** + * translational global motion compensation. + */ + void (*gmc1)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */, + int srcStride, int h, int x16, int y16, int rounder); + /** + * global motion compensation. + */ + void (*gmc)(uint8_t *dst /* align 8 */, uint8_t *src /* align 1 */, + int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, + int shift, int r, int width, int height); +} MpegVideoDSPContext; + +void ff_mpegvideodsp_init(MpegVideoDSPContext *c); +void ff_mpegvideodsp_init_ppc(MpegVideoDSPContext *c); +void ff_mpegvideodsp_init_x86(MpegVideoDSPContext *c); + +#endif /* AVCODEC_MPEGVIDEODSP_H */ diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index ef3685ac0f..aa5bd5d011 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -10,7 +10,8 @@ OBJS-$(CONFIG_H264QPEL) += ppc/h264qpel.o OBJS-$(CONFIG_HPELDSP) += ppc/hpeldsp_altivec.o OBJS-$(CONFIG_HUFFYUVDSP) += ppc/huffyuvdsp_altivec.o OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o -OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o +OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o \ + ppc/mpegvideodsp.o OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o @@ -23,7 +24,6 @@ OBJS-$(CONFIG_VP8_DECODER) += ppc/vp8dsp_altivec.o ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \ ppc/fdct_altivec.o \ - ppc/gmc_altivec.o \ ppc/idct_altivec.o \ FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h index a835024169..e4f8770663 100644 --- a/libavcodec/ppc/dsputil_altivec.h +++ b/libavcodec/ppc/dsputil_altivec.h @@ -28,8 +28,7 @@ #include "libavcodec/dsputil.h" void ff_fdct_altivec(int16_t *block); -void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, - int x16, int y16, int rounder); + void ff_idct_altivec(int16_t *block); void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index ebdf0a4b48..48b3b4a6bb 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -36,8 +36,6 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, if (PPC_ALTIVEC(mm_flags)) { ff_dsputil_init_altivec(c, avctx, high_bit_depth); - c->gmc1 = ff_gmc1_altivec; - if (!high_bit_depth) { #if CONFIG_ENCODERS if (avctx->dct_algo == FF_DCT_AUTO || diff --git a/libavcodec/ppc/gmc_altivec.c b/libavcodec/ppc/mpegvideodsp.c similarity index 92% rename from libavcodec/ppc/gmc_altivec.c rename to libavcodec/ppc/mpegvideodsp.c index 683a056b63..7696954335 100644 --- a/libavcodec/ppc/gmc_altivec.c +++ b/libavcodec/ppc/mpegvideodsp.c @@ -23,12 +23,13 @@ #include "libavutil/mem.h" #include "libavutil/ppc/types_altivec.h" #include "libavutil/ppc/util_altivec.h" -#include "dsputil_altivec.h" +#include "libavcodec/mpegvideodsp.h" +#if HAVE_ALTIVEC /* AltiVec-enhanced gmc1. ATM this code assumes stride is a multiple of 8 * to preserve proper dst alignment. */ -void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, - int stride, int h, int x16, int y16, int rounder) +static void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, + int stride, int h, int x16, int y16, int rounder) { int i; const DECLARE_ALIGNED(16, unsigned short, rounder_a) = rounder; @@ -122,3 +123,11 @@ void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, src += stride; } } +#endif /* HAVE_ALTIVEC */ + +av_cold void ff_mpegvideodsp_init_ppc(MpegVideoDSPContext *c) +{ +#if HAVE_ALTIVEC + c->gmc1 = gmc1_altivec; +#endif /* HAVE_ALTIVEC */ +} diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 61d73ffcb4..efb371eb59 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -26,7 +26,8 @@ OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_mmx.o OBJS-$(CONFIG_LPC) += x86/lpc.o OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o -OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o +OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ + x86/mpegvideodsp.o OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index 63eea89a32..f2e312eb53 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -58,9 +58,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, } } -#if CONFIG_VIDEODSP - c->gmc = ff_gmc_mmx; -#endif #endif /* HAVE_MMX_INLINE */ #if HAVE_MMX_EXTERNAL diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 310039957f..19c03d8586 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -247,127 +247,4 @@ void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, } } -#if CONFIG_VIDEODSP -void ff_gmc_mmx(uint8_t *dst, uint8_t *src, - int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, - int shift, int r, int width, int height) -{ - const int w = 8; - const int ix = ox >> (16 + shift); - const int iy = oy >> (16 + shift); - const int oxs = ox >> 4; - const int oys = oy >> 4; - const int dxxs = dxx >> 4; - const int dxys = dxy >> 4; - const int dyxs = dyx >> 4; - const int dyys = dyy >> 4; - const uint16_t r4[4] = { r, r, r, r }; - const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys }; - const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys }; - const uint64_t shift2 = 2 * shift; -#define MAX_STRIDE 4096U -#define MAX_H 8U - uint8_t edge_buf[(MAX_H + 1) * MAX_STRIDE]; - int x, y; - - const int dxw = (dxx - (1 << (16 + shift))) * (w - 1); - const int dyh = (dyy - (1 << (16 + shift))) * (h - 1); - const int dxh = dxy * (h - 1); - const int dyw = dyx * (w - 1); - int need_emu = (unsigned) ix >= width - w || - (unsigned) iy >= height - h; - - if ( // non-constant fullpel offset (3% of blocks) - ((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) | - (oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + shift) || - // uses more than 16 bits of subpel mv (only at huge resolution) - (dxx | dxy | dyx | dyy) & 15 || - (need_emu && (h > MAX_H || stride > MAX_STRIDE))) { - // FIXME could still use mmx for some of the rows - ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, - shift, r, width, height); - return; - } - - src += ix + iy * stride; - if (need_emu) { - ff_emulated_edge_mc_8(edge_buf, src, stride, stride, w + 1, h + 1, ix, iy, width, height); - src = edge_buf; - } - - __asm__ volatile ( - "movd %0, %%mm6 \n\t" - "pxor %%mm7, %%mm7 \n\t" - "punpcklwd %%mm6, %%mm6 \n\t" - "punpcklwd %%mm6, %%mm6 \n\t" - :: "r" (1 << shift)); - - for (x = 0; x < w; x += 4) { - uint16_t dx4[4] = { oxs - dxys + dxxs * (x + 0), - oxs - dxys + dxxs * (x + 1), - oxs - dxys + dxxs * (x + 2), - oxs - dxys + dxxs * (x + 3) }; - uint16_t dy4[4] = { oys - dyys + dyxs * (x + 0), - oys - dyys + dyxs * (x + 1), - oys - dyys + dyxs * (x + 2), - oys - dyys + dyxs * (x + 3) }; - - for (y = 0; y < h; y++) { - __asm__ volatile ( - "movq %0, %%mm4 \n\t" - "movq %1, %%mm5 \n\t" - "paddw %2, %%mm4 \n\t" - "paddw %3, %%mm5 \n\t" - "movq %%mm4, %0 \n\t" - "movq %%mm5, %1 \n\t" - "psrlw $12, %%mm4 \n\t" - "psrlw $12, %%mm5 \n\t" - : "+m" (*dx4), "+m" (*dy4) - : "m" (*dxy4), "m" (*dyy4)); - - __asm__ volatile ( - "movq %%mm6, %%mm2 \n\t" - "movq %%mm6, %%mm1 \n\t" - "psubw %%mm4, %%mm2 \n\t" - "psubw %%mm5, %%mm1 \n\t" - "movq %%mm2, %%mm0 \n\t" - "movq %%mm4, %%mm3 \n\t" - "pmullw %%mm1, %%mm0 \n\t" // (s - dx) * (s - dy) - "pmullw %%mm5, %%mm3 \n\t" // dx * dy - "pmullw %%mm5, %%mm2 \n\t" // (s - dx) * dy - "pmullw %%mm4, %%mm1 \n\t" // dx * (s - dy) - - "movd %4, %%mm5 \n\t" - "movd %3, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "pmullw %%mm5, %%mm3 \n\t" // src[1, 1] * dx * dy - "pmullw %%mm4, %%mm2 \n\t" // src[0, 1] * (s - dx) * dy - - "movd %2, %%mm5 \n\t" - "movd %1, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "pmullw %%mm5, %%mm1 \n\t" // src[1, 0] * dx * (s - dy) - "pmullw %%mm4, %%mm0 \n\t" // src[0, 0] * (s - dx) * (s - dy) - "paddw %5, %%mm1 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm1, %%mm0 \n\t" - "paddw %%mm2, %%mm0 \n\t" - - "psrlw %6, %%mm0 \n\t" - "packuswb %%mm0, %%mm0 \n\t" - "movd %%mm0, %0 \n\t" - - : "=m" (dst[x + y * stride]) - : "m" (src[0]), "m" (src[1]), - "m" (src[stride]), "m" (src[stride + 1]), - "m" (*r4), "m" (shift2)); - src += stride; - } - src += 4 - h * stride; - } -} -#endif #endif /* HAVE_INLINE_ASM */ diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h index 14b7482cb9..ee5a9d4bef 100644 --- a/libavcodec/x86/dsputil_x86.h +++ b/libavcodec/x86/dsputil_x86.h @@ -43,12 +43,7 @@ void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int h, int sides); -void ff_gmc_mmx(uint8_t *dst, uint8_t *src, - int stride, int h, int ox, int oy, - int dxx, int dxy, int dyx, int dyy, - int shift, int r, int width, int height); void ff_mmx_idct(int16_t *block); void ff_mmxext_idct(int16_t *block); - #endif /* AVCODEC_X86_DSPUTIL_X86_H */ diff --git a/libavcodec/x86/mpegvideodsp.c b/libavcodec/x86/mpegvideodsp.c new file mode 100644 index 0000000000..941a8e2e4c --- /dev/null +++ b/libavcodec/x86/mpegvideodsp.c @@ -0,0 +1,161 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/mpegvideodsp.h" +#include "libavcodec/videodsp.h" + +#if HAVE_INLINE_ASM + +static void gmc_mmx(uint8_t *dst, uint8_t *src, + int stride, int h, int ox, int oy, + int dxx, int dxy, int dyx, int dyy, + int shift, int r, int width, int height) +{ + const int w = 8; + const int ix = ox >> (16 + shift); + const int iy = oy >> (16 + shift); + const int oxs = ox >> 4; + const int oys = oy >> 4; + const int dxxs = dxx >> 4; + const int dxys = dxy >> 4; + const int dyxs = dyx >> 4; + const int dyys = dyy >> 4; + const uint16_t r4[4] = { r, r, r, r }; + const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys }; + const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys }; + const uint64_t shift2 = 2 * shift; +#define MAX_STRIDE 4096U +#define MAX_H 8U + uint8_t edge_buf[(MAX_H + 1) * MAX_STRIDE]; + int x, y; + + const int dxw = (dxx - (1 << (16 + shift))) * (w - 1); + const int dyh = (dyy - (1 << (16 + shift))) * (h - 1); + const int dxh = dxy * (h - 1); + const int dyw = dyx * (w - 1); + int need_emu = (unsigned) ix >= width - w || + (unsigned) iy >= height - h; + + if ( // non-constant fullpel offset (3% of blocks) + ((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) | + (oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + shift) || + // uses more than 16 bits of subpel mv (only at huge resolution) + (dxx | dxy | dyx | dyy) & 15 || + (need_emu && (h > MAX_H || stride > MAX_STRIDE))) { + // FIXME could still use mmx for some of the rows + ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, + shift, r, width, height); + return; + } + + src += ix + iy * stride; + if (need_emu) { + ff_emulated_edge_mc_8(edge_buf, src, stride, stride, w + 1, h + 1, ix, iy, width, height); + src = edge_buf; + } + + __asm__ volatile ( + "movd %0, %%mm6 \n\t" + "pxor %%mm7, %%mm7 \n\t" + "punpcklwd %%mm6, %%mm6 \n\t" + "punpcklwd %%mm6, %%mm6 \n\t" + :: "r" (1 << shift)); + + for (x = 0; x < w; x += 4) { + uint16_t dx4[4] = { oxs - dxys + dxxs * (x + 0), + oxs - dxys + dxxs * (x + 1), + oxs - dxys + dxxs * (x + 2), + oxs - dxys + dxxs * (x + 3) }; + uint16_t dy4[4] = { oys - dyys + dyxs * (x + 0), + oys - dyys + dyxs * (x + 1), + oys - dyys + dyxs * (x + 2), + oys - dyys + dyxs * (x + 3) }; + + for (y = 0; y < h; y++) { + __asm__ volatile ( + "movq %0, %%mm4 \n\t" + "movq %1, %%mm5 \n\t" + "paddw %2, %%mm4 \n\t" + "paddw %3, %%mm5 \n\t" + "movq %%mm4, %0 \n\t" + "movq %%mm5, %1 \n\t" + "psrlw $12, %%mm4 \n\t" + "psrlw $12, %%mm5 \n\t" + : "+m" (*dx4), "+m" (*dy4) + : "m" (*dxy4), "m" (*dyy4)); + + __asm__ volatile ( + "movq %%mm6, %%mm2 \n\t" + "movq %%mm6, %%mm1 \n\t" + "psubw %%mm4, %%mm2 \n\t" + "psubw %%mm5, %%mm1 \n\t" + "movq %%mm2, %%mm0 \n\t" + "movq %%mm4, %%mm3 \n\t" + "pmullw %%mm1, %%mm0 \n\t" // (s - dx) * (s - dy) + "pmullw %%mm5, %%mm3 \n\t" // dx * dy + "pmullw %%mm5, %%mm2 \n\t" // (s - dx) * dy + "pmullw %%mm4, %%mm1 \n\t" // dx * (s - dy) + + "movd %4, %%mm5 \n\t" + "movd %3, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "pmullw %%mm5, %%mm3 \n\t" // src[1, 1] * dx * dy + "pmullw %%mm4, %%mm2 \n\t" // src[0, 1] * (s - dx) * dy + + "movd %2, %%mm5 \n\t" + "movd %1, %%mm4 \n\t" + "punpcklbw %%mm7, %%mm5 \n\t" + "punpcklbw %%mm7, %%mm4 \n\t" + "pmullw %%mm5, %%mm1 \n\t" // src[1, 0] * dx * (s - dy) + "pmullw %%mm4, %%mm0 \n\t" // src[0, 0] * (s - dx) * (s - dy) + "paddw %5, %%mm1 \n\t" + "paddw %%mm3, %%mm2 \n\t" + "paddw %%mm1, %%mm0 \n\t" + "paddw %%mm2, %%mm0 \n\t" + + "psrlw %6, %%mm0 \n\t" + "packuswb %%mm0, %%mm0 \n\t" + "movd %%mm0, %0 \n\t" + + : "=m" (dst[x + y * stride]) + : "m" (src[0]), "m" (src[1]), + "m" (src[stride]), "m" (src[stride + 1]), + "m" (*r4), "m" (shift2)); + src += stride; + } + src += 4 - h * stride; + } +} + +#endif /* HAVE_INLINE_ASM */ + +av_cold void ff_mpegvideodsp_init_x86(MpegVideoDSPContext *c) +{ +#if HAVE_INLINE_ASM + int cpu_flags = av_get_cpu_flags(); + + if (INLINE_MMX(cpu_flags)) + c->gmc = gmc_mmx; +#endif /* HAVE_INLINE_ASM */ +}