From 9a1ced321bb900acc68cae07f4141d5f257cadd5 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Thu, 17 Feb 2011 14:45:03 -0500 Subject: [PATCH] dsputil: move VC1-specific stuff into VC1DSPContext. (cherry picked from commit 12802ec0601c3bd7b9c7a2503518e28fd5e7d744) --- libavcodec/dsputil.c | 53 ---------- libavcodec/dsputil.h | 27 ----- libavcodec/ppc/dsputil_altivec.h | 1 - libavcodec/ppc/dsputil_ppc.c | 2 - libavcodec/ppc/vc1dsp_altivec.c | 6 +- libavcodec/vc1.c | 16 +-- libavcodec/vc1.h | 2 + libavcodec/vc1dec.c | 174 ++++++++++++++++--------------- libavcodec/vc1dsp.c | 61 ++++++++++- libavcodec/vc1dsp.h | 67 ++++++++++++ libavcodec/x86/dsputil_mmx.c | 20 ---- libavcodec/x86/dsputil_mmx.h | 1 - libavcodec/x86/vc1dsp_mmx.c | 63 +++++++---- 13 files changed, 272 insertions(+), 221 deletions(-) create mode 100644 libavcodec/vc1dsp.h diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index c89e58f529..f7151b8815 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -1600,54 +1600,6 @@ H264_CHROMA_MC(avg_ , op_avg) #undef op_avg #undef op_put -static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ - const int A=(8-x)*(8-y); - const int B=( x)*(8-y); - const int C=(8-x)*( y); - const int D=( x)*( y); - int i; - - assert(x<8 && y<8 && x>=0 && y>=0); - - for(i=0; i> 6; - dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6; - dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6; - dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6; - dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6; - dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6; - dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6; - dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6; - dst+= stride; - src+= stride; - } -} - -static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ - const int A=(8-x)*(8-y); - const int B=( x)*(8-y); - const int C=(8-x)*( y); - const int D=( x)*( y); - int i; - - assert(x<8 && y<8 && x>=0 && y>=0); - - for(i=0; i> 6)); - dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6)); - dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6)); - dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6)); - dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6)); - dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6)); - dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6)); - dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6)); - dst+= stride; - src+= stride; - } -} - #define QPEL_MC(r, OPNAME, RND, OP) \ static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ @@ -4301,17 +4253,12 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c; c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c; c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c; - c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c; - c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c; c->draw_edges = draw_edges_c; #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER ff_mlp_init(c, avctx); #endif -#if CONFIG_VC1_DECODER - ff_vc1dsp_init(c,avctx); -#endif #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER ff_intrax8dsp_init(c,avctx); #endif diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index c8111866c2..f97b2b5fef 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -341,9 +341,6 @@ typedef struct DSPContext { */ h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; - /* This is really one func used in VC-1 decoding */ - h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3]; - h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3]; qpel_mc_func put_h264_qpel_pixels_tab[4][16]; qpel_mc_func avg_h264_qpel_pixels_tab[4][16]; @@ -503,29 +500,6 @@ typedef struct DSPContext { unsigned int filter_shift, int32_t mask, int blocksize, int32_t *sample_buffer); - /* vc1 functions */ - void (*vc1_inv_trans_8x8)(DCTELEM *b); - void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block); - void (*vc1_v_overlap)(uint8_t* src, int stride); - void (*vc1_h_overlap)(uint8_t* src, int stride); - void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq); - void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq); - void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq); - void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq); - void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq); - void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq); - /* put 8x8 block with bicubic interpolation and quarterpel precision - * last argument is actually round value instead of height - */ - op_pixels_func put_vc1_mspel_pixels_tab[16]; - op_pixels_func avg_vc1_mspel_pixels_tab[16]; - /* intrax8 functions */ void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize); void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize, @@ -629,7 +603,6 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_dwt(DSPContext *c); void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx); void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx); -void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx); void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); void ff_mlp_init(DSPContext* c, AVCodecContext *avctx); void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx); diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h index cd44f602b9..4147eec823 100644 --- a/libavcodec/ppc/dsputil_altivec.h +++ b/libavcodec/ppc/dsputil_altivec.h @@ -43,7 +43,6 @@ void ff_vp3_idct_add_altivec(uint8_t *dest, int line_size, DCTELEM *block); void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx); void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); -void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx); void float_init_altivec(DSPContext* c, AVCodecContext *avctx); void int_init_altivec(DSPContext* c, AVCodecContext *avctx); diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index 9e4f1aa667..d13c1ce661 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -171,8 +171,6 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { dsputil_init_altivec(c, avctx); - if(CONFIG_VC1_DECODER) - vc1dsp_init_altivec(c, avctx); float_init_altivec(c, avctx); int_init_altivec(c, avctx); c->gmc1 = gmc1_altivec; diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c index 5ffe9a5479..95774389a5 100644 --- a/libavcodec/ppc/vc1dsp_altivec.c +++ b/libavcodec/ppc/vc1dsp_altivec.c @@ -322,7 +322,11 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, DCTELEM *block) } -void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) { +void ff_vc1dsp_init_altivec(VC1DSPContext* dsp) +{ + if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) + return; + dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec; dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec; } diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c index 1f39878293..8bd6647f13 100644 --- a/libavcodec/vc1.c +++ b/libavcodec/vc1.c @@ -337,14 +337,14 @@ int vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitConte v->res_fasttx = get_bits1(gb); if (!v->res_fasttx) { - v->s.dsp.vc1_inv_trans_8x8 = ff_simple_idct; - v->s.dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add; - v->s.dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add; - v->s.dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add; - v->s.dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add; - v->s.dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add; - v->s.dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add; - v->s.dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add; + v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct; + v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add; + v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add; + v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add; + v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add; + v->vc1dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add; + v->vc1dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add; + v->vc1dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add; } v->fastuvmc = get_bits1(gb); //common diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h index da0b6c16e7..d826c5a817 100644 --- a/libavcodec/vc1.h +++ b/libavcodec/vc1.h @@ -26,6 +26,7 @@ #include "avcodec.h" #include "mpegvideo.h" #include "intrax8.h" +#include "vc1dsp.h" /** Markers used in VC-1 AP frame data */ //@{ @@ -155,6 +156,7 @@ enum COTypes { typedef struct VC1Context{ MpegEncContext s; IntraX8Context x8; + VC1DSPContext vc1dsp; int bits; diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index e254d1b0b8..6707cdec71 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -160,29 +160,30 @@ enum Imode { /** @} */ //Bitplane group -static void vc1_loop_filter_iblk(MpegEncContext *s, int pq) +static void vc1_loop_filter_iblk(VC1Context *v, int pq) { + MpegEncContext *s = &v->s; int j; if (!s->first_slice_line) { - s->dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq); + v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq); if (s->mb_x) - s->dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize, s->linesize, pq); - s->dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize+8, s->linesize, pq); + v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize, s->linesize, pq); + v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize+8, s->linesize, pq); for(j = 0; j < 2; j++){ - s->dsp.vc1_v_loop_filter8(s->dest[j+1], s->uvlinesize, pq); + v->vc1dsp.vc1_v_loop_filter8(s->dest[j+1], s->uvlinesize, pq); if (s->mb_x) - s->dsp.vc1_h_loop_filter8(s->dest[j+1]-8*s->uvlinesize, s->uvlinesize, pq); + v->vc1dsp.vc1_h_loop_filter8(s->dest[j+1]-8*s->uvlinesize, s->uvlinesize, pq); } } - s->dsp.vc1_v_loop_filter16(s->dest[0] + 8*s->linesize, s->linesize, pq); + v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8*s->linesize, s->linesize, pq); if (s->mb_y == s->mb_height-1) { if (s->mb_x) { - s->dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq); - s->dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq); - s->dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq); + v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq); + v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq); + v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq); } - s->dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq); + v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq); } } @@ -342,11 +343,11 @@ static void vc1_mc_1mv(VC1Context *v, int dir) if(s->mspel) { dxy = ((my & 3) << 2) | (mx & 3); - dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] , srcY , s->linesize, v->rnd); - dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd); + v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] , srcY , s->linesize, v->rnd); + v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd); srcY += s->linesize * 8; - dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize , srcY , s->linesize, v->rnd); - dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd); + v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize , srcY , s->linesize, v->rnd); + v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd); } else { // hpel mc - always used for luma dxy = (my & 2) | ((mx & 2) >> 1); @@ -364,8 +365,8 @@ static void vc1_mc_1mv(VC1Context *v, int dir) dsp->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy); dsp->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy); }else{ - dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy); - dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy); + v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy); + v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy); } } @@ -433,7 +434,7 @@ static void vc1_mc_4mv_luma(VC1Context *v, int n) if(s->mspel) { dxy = ((my & 3) << 2) | (mx & 3); - dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize, v->rnd); + v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize, v->rnd); } else { // hpel mc - always used for luma dxy = (my & 2) | ((mx & 2) >> 1); if(!v->rnd) @@ -583,8 +584,8 @@ static void vc1_mc_4mv_chroma(VC1Context *v) dsp->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy); dsp->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy); }else{ - dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy); - dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy); + v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy); + v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy); } } @@ -906,11 +907,11 @@ static void vc1_interp_mc(VC1Context *v) if(s->mspel) { dxy = ((my & 3) << 2) | (mx & 3); - dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] , srcY , s->linesize, v->rnd); - dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd); + v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] , srcY , s->linesize, v->rnd); + v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd); srcY += s->linesize * 8; - dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize , srcY , s->linesize, v->rnd); - dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd); + v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize , srcY , s->linesize, v->rnd); + v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd); } else { // hpel mc dxy = (my & 2) | ((mx & 2) >> 1); @@ -928,8 +929,8 @@ static void vc1_interp_mc(VC1Context *v) dsp->avg_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy); dsp->avg_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy); }else{ - dsp->avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy); - dsp->avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy); + v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy); + v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy); } } @@ -2039,15 +2040,15 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan } if(!skip_block){ if(i==1) - s->dsp.vc1_inv_trans_8x8_dc(dst, linesize, block); + v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block); else{ - s->dsp.vc1_inv_trans_8x8(block); + v->vc1dsp.vc1_inv_trans_8x8(block); s->dsp.add_pixels_clamped(block, dst, linesize); } if(apply_filter && cbp_top & 0xC) - s->dsp.vc1_v_loop_filter8(dst, linesize, v->pq); + v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq); if(apply_filter && cbp_left & 0xA) - s->dsp.vc1_h_loop_filter8(dst, linesize, v->pq); + v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq); } break; case TT_4X4: @@ -2068,13 +2069,13 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan } if(!(subblkpat & (1 << (3 - j))) && !skip_block){ if(i==1) - s->dsp.vc1_inv_trans_4x4_dc(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off); + v->vc1dsp.vc1_inv_trans_4x4_dc(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off); else - s->dsp.vc1_inv_trans_4x4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off); + v->vc1dsp.vc1_inv_trans_4x4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off); if(apply_filter && (j&2 ? pat & (1<<(j-2)) : (cbp_top & (1 << (j + 2))))) - s->dsp.vc1_v_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq); + v->vc1dsp.vc1_v_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq); if(apply_filter && (j&1 ? pat & (1<<(j-1)) : (cbp_left & (1 << (j + 1))))) - s->dsp.vc1_h_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq); + v->vc1dsp.vc1_h_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq); } } break; @@ -2096,13 +2097,13 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan } if(!(subblkpat & (1 << (1 - j))) && !skip_block){ if(i==1) - s->dsp.vc1_inv_trans_8x4_dc(dst + j*4*linesize, linesize, block + off); + v->vc1dsp.vc1_inv_trans_8x4_dc(dst + j*4*linesize, linesize, block + off); else - s->dsp.vc1_inv_trans_8x4(dst + j*4*linesize, linesize, block + off); + v->vc1dsp.vc1_inv_trans_8x4(dst + j*4*linesize, linesize, block + off); if(apply_filter && j ? pat & 0x3 : (cbp_top & 0xC)) - s->dsp.vc1_v_loop_filter8(dst + j*4*linesize, linesize, v->pq); + v->vc1dsp.vc1_v_loop_filter8(dst + j*4*linesize, linesize, v->pq); if(apply_filter && cbp_left & (2 << j)) - s->dsp.vc1_h_loop_filter4(dst + j*4*linesize, linesize, v->pq); + v->vc1dsp.vc1_h_loop_filter4(dst + j*4*linesize, linesize, v->pq); } } break; @@ -2124,13 +2125,13 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan } if(!(subblkpat & (1 << (1 - j))) && !skip_block){ if(i==1) - s->dsp.vc1_inv_trans_4x8_dc(dst + j*4, linesize, block + off); + v->vc1dsp.vc1_inv_trans_4x8_dc(dst + j*4, linesize, block + off); else - s->dsp.vc1_inv_trans_4x8(dst + j*4, linesize, block + off); + v->vc1dsp.vc1_inv_trans_4x8(dst + j*4, linesize, block + off); if(apply_filter && cbp_top & (2 << j)) - s->dsp.vc1_v_loop_filter4(dst + j*4, linesize, v->pq); + v->vc1dsp.vc1_v_loop_filter4(dst + j*4, linesize, v->pq); if(apply_filter && j ? pat & 0x5 : (cbp_left & 0xA)) - s->dsp.vc1_h_loop_filter8(dst + j*4, linesize, v->pq); + v->vc1dsp.vc1_h_loop_filter8(dst + j*4, linesize, v->pq); } } break; @@ -2232,14 +2233,14 @@ static int vc1_decode_p_mb(VC1Context *v) vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset); if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; - s->dsp.vc1_inv_trans_8x8(s->block[i]); + v->vc1dsp.vc1_inv_trans_8x8(s->block[i]); if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1; s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); if(v->pq >= 9 && v->overlap) { if(v->c_avail) - s->dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); + v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); if(v->a_avail) - s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); + v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); } if(apply_loop_filter && s->mb_x && s->mb_x != (s->mb_width - 1) && s->mb_y && s->mb_y != (s->mb_height - 1)){ int left_cbp, top_cbp; @@ -2251,9 +2252,9 @@ static int vc1_decode_p_mb(VC1Context *v) top_cbp = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4)); } if(left_cbp & 0xC) - s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); + v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); if(top_cbp & 0xA) - s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); + v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); } block_cbp |= 0xF << (i << 2); } else if(val) { @@ -2268,9 +2269,9 @@ static int vc1_decode_p_mb(VC1Context *v) top_cbp = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4)); } if(left_cbp & 0xC) - s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); + v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); if(top_cbp & 0xA) - s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); + v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); } pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), filter, left_cbp, top_cbp); block_cbp |= pat << (i << 2); @@ -2363,14 +2364,14 @@ static int vc1_decode_p_mb(VC1Context *v) vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant, (i&4)?v->codingset2:v->codingset); if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; - s->dsp.vc1_inv_trans_8x8(s->block[i]); + v->vc1dsp.vc1_inv_trans_8x8(s->block[i]); if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1; s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize); if(v->pq >= 9 && v->overlap) { if(v->c_avail) - s->dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); + v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); if(v->a_avail) - s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); + v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); } if(v->s.loop_filter && s->mb_x && s->mb_x != (s->mb_width - 1) && s->mb_y && s->mb_y != (s->mb_height - 1)){ int left_cbp, top_cbp; @@ -2382,9 +2383,9 @@ static int vc1_decode_p_mb(VC1Context *v) top_cbp = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4)); } if(left_cbp & 0xC) - s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); + v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); if(top_cbp & 0xA) - s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); + v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); } block_cbp |= 0xF << (i << 2); } else if(is_coded[i]) { @@ -2399,9 +2400,9 @@ static int vc1_decode_p_mb(VC1Context *v) top_cbp = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4)); } if(left_cbp & 0xC) - s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); + v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); if(top_cbp & 0xA) - s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); + v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq); } pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), filter, left_cbp, top_cbp); block_cbp |= pat << (i << 2); @@ -2568,7 +2569,7 @@ static void vc1_decode_b_mb(VC1Context *v) vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset); if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; - s->dsp.vc1_inv_trans_8x8(s->block[i]); + v->vc1dsp.vc1_inv_trans_8x8(s->block[i]); if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1; s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); } else if(val) { @@ -2650,7 +2651,7 @@ static void vc1_decode_i_blocks(VC1Context *v) vc1_decode_i_block(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2); - s->dsp.vc1_inv_trans_8x8(s->block[k]); + v->vc1dsp.vc1_inv_trans_8x8(s->block[k]); if(v->pq >= 9 && v->overlap) { for(j = 0; j < 64; j++) s->block[k][j] += 128; } @@ -2659,27 +2660,27 @@ static void vc1_decode_i_blocks(VC1Context *v) vc1_put_block(v, s->block); if(v->pq >= 9 && v->overlap) { if(s->mb_x) { - s->dsp.vc1_h_overlap(s->dest[0], s->linesize); - s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize); + v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize); + v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize); if(!(s->flags & CODEC_FLAG_GRAY)) { - s->dsp.vc1_h_overlap(s->dest[1], s->uvlinesize); - s->dsp.vc1_h_overlap(s->dest[2], s->uvlinesize); + v->vc1dsp.vc1_h_overlap(s->dest[1], s->uvlinesize); + v->vc1dsp.vc1_h_overlap(s->dest[2], s->uvlinesize); } } - s->dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize); - s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize); + v->vc1dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize); + v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize); if(!s->first_slice_line) { - s->dsp.vc1_v_overlap(s->dest[0], s->linesize); - s->dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize); + v->vc1dsp.vc1_v_overlap(s->dest[0], s->linesize); + v->vc1dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize); if(!(s->flags & CODEC_FLAG_GRAY)) { - s->dsp.vc1_v_overlap(s->dest[1], s->uvlinesize); - s->dsp.vc1_v_overlap(s->dest[2], s->uvlinesize); + v->vc1dsp.vc1_v_overlap(s->dest[1], s->uvlinesize); + v->vc1dsp.vc1_v_overlap(s->dest[2], s->uvlinesize); } } - s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize); - s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize); + v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize); + v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize); } - if(v->s.loop_filter) vc1_loop_filter_iblk(s, v->pq); + if(v->s.loop_filter) vc1_loop_filter_iblk(v, v->pq); if(get_bits_count(&s->gb) > v->bits) { ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)); @@ -2790,34 +2791,34 @@ static void vc1_decode_i_blocks_adv(VC1Context *v) vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant); - s->dsp.vc1_inv_trans_8x8(s->block[k]); + v->vc1dsp.vc1_inv_trans_8x8(s->block[k]); for(j = 0; j < 64; j++) s->block[k][j] += 128; } vc1_put_block(v, s->block); if(overlap) { if(s->mb_x) { - s->dsp.vc1_h_overlap(s->dest[0], s->linesize); - s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize); + v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize); + v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize); if(!(s->flags & CODEC_FLAG_GRAY)) { - s->dsp.vc1_h_overlap(s->dest[1], s->uvlinesize); - s->dsp.vc1_h_overlap(s->dest[2], s->uvlinesize); + v->vc1dsp.vc1_h_overlap(s->dest[1], s->uvlinesize); + v->vc1dsp.vc1_h_overlap(s->dest[2], s->uvlinesize); } } - s->dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize); - s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize); + v->vc1dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize); + v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize); if(!s->first_slice_line) { - s->dsp.vc1_v_overlap(s->dest[0], s->linesize); - s->dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize); + v->vc1dsp.vc1_v_overlap(s->dest[0], s->linesize); + v->vc1dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize); if(!(s->flags & CODEC_FLAG_GRAY)) { - s->dsp.vc1_v_overlap(s->dest[1], s->uvlinesize); - s->dsp.vc1_v_overlap(s->dest[2], s->uvlinesize); + v->vc1dsp.vc1_v_overlap(s->dest[1], s->uvlinesize); + v->vc1dsp.vc1_v_overlap(s->dest[2], s->uvlinesize); } } - s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize); - s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize); + v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize); + v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize); } - if(v->s.loop_filter) vc1_loop_filter_iblk(s, v->pq); + if(v->s.loop_filter) vc1_loop_filter_iblk(v, v->pq); if(get_bits_count(&s->gb) > v->bits) { ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)); @@ -2929,7 +2930,7 @@ static void vc1_decode_b_blocks(VC1Context *v) av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y); return; } - if(v->s.loop_filter) vc1_loop_filter_iblk(s, v->pq); + if(v->s.loop_filter) vc1_loop_filter_iblk(v, v->pq); } if (!v->s.loop_filter) ff_draw_horiz_band(s, s->mb_y * 16, 16); @@ -3023,6 +3024,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) if(ff_msmpeg4_decode_init(avctx) < 0) return -1; if (vc1_init_common(v) < 0) return -1; + ff_vc1dsp_init(&v->vc1dsp); for (i = 0; i < 64; i++) { #define transpose(x) ((x>>3) | ((x&7)<<3)) v->zz_8x8[0][i] = transpose(wmv1_scantable[0][i]); diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c index fd740e12d4..000dad7d26 100644 --- a/libavcodec/vc1dsp.c +++ b/libavcodec/vc1dsp.c @@ -25,7 +25,7 @@ * */ -#include "dsputil.h" +#include "vc1dsp.h" /** Apply overlap transform to horizontal edge @@ -612,7 +612,56 @@ PUT_VC1_MSPEL(1, 3) PUT_VC1_MSPEL(2, 3) PUT_VC1_MSPEL(3, 3) -av_cold void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) { +static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ + const int A=(8-x)*(8-y); + const int B=( x)*(8-y); + const int C=(8-x)*( y); + const int D=( x)*( y); + int i; + + assert(x<8 && y<8 && x>=0 && y>=0); + + for(i=0; i> 6; + dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6; + dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6; + dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6; + dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6; + dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6; + dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6; + dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6; + dst+= stride; + src+= stride; + } +} + +#define avg2(a,b) ((a+b+1)>>1) +static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){ + const int A=(8-x)*(8-y); + const int B=( x)*(8-y); + const int C=(8-x)*( y); + const int D=( x)*( y); + int i; + + assert(x<8 && y<8 && x>=0 && y>=0); + + for(i=0; i> 6)); + dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6)); + dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6)); + dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6)); + dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6)); + dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6)); + dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6)); + dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6)); + dst+= stride; + src+= stride; + } +} + +av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) { dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; @@ -663,4 +712,12 @@ av_cold void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) { dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c; dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c; dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c; + + dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c; + dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c; + + if (HAVE_ALTIVEC) + ff_vc1dsp_init_altivec(dsp); + if (HAVE_MMX) + ff_vc1dsp_init_mmx(dsp); } diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h new file mode 100644 index 0000000000..a1f3d90574 --- /dev/null +++ b/libavcodec/vc1dsp.h @@ -0,0 +1,67 @@ +/* + * VC-1 and WMV3 decoder - DSP functions + * Copyright (c) 2006 Konstantin Shishkov + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * VC-1 and WMV3 decoder + * + */ + +#ifndef AVCODEC_VC1DSP_H +#define AVCODEC_VC1DSP_H + +#include "dsputil.h" + +typedef struct VC1DSPContext { + /* vc1 functions */ + void (*vc1_inv_trans_8x8)(DCTELEM *b); + void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block); + void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block); + void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block); + void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block); + void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block); + void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block); + void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block); + void (*vc1_v_overlap)(uint8_t* src, int stride); + void (*vc1_h_overlap)(uint8_t* src, int stride); + void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq); + void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq); + void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq); + void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq); + void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq); + void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq); + + /* put 8x8 block with bicubic interpolation and quarterpel precision + * last argument is actually round value instead of height + */ + op_pixels_func put_vc1_mspel_pixels_tab[16]; + op_pixels_func avg_vc1_mspel_pixels_tab[16]; + + /* This is really one func used in VC-1 decoding */ + h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3]; + h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3]; +} VC1DSPContext; + +void ff_vc1dsp_init(VC1DSPContext* c); +void ff_vc1dsp_init_altivec(VC1DSPContext* c); +void ff_vc1dsp_init_mmx(VC1DSPContext* dsp); + +#endif /* AVCODEC_VC1DSP_H */ diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 39bf3f2936..16ad965ec1 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -1894,20 +1894,14 @@ PREFETCH(prefetch_3dnow, prefetch) void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); -void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); void ff_avg_h264_chroma_mc8_mmx2_rnd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); -void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); -void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); void ff_avg_rv40_chroma_mc8_3dnow (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); @@ -1931,15 +1925,11 @@ void ff_avg_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src, void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); -void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); -void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src, - int stride, int h, int x, int y); void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); @@ -2535,7 +2525,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) #if HAVE_YASM c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx; - c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd; c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx; c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx; @@ -2622,8 +2611,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2; c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2; - c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd; - c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2; c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2; @@ -2636,9 +2623,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; #endif - if (CONFIG_VC1_DECODER) - ff_vc1dsp_init_mmx(c, avctx); - c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2; } else if (mm_flags & AV_CPU_FLAG_3DNOW) { c->prefetch = prefetch_3dnow; @@ -2695,8 +2679,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow; - c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd; - c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow; c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow; #endif @@ -2745,8 +2727,6 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS(3, 3, ssse3); c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; #if HAVE_YASM - c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd; - c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd; c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd; c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3; diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h index a095e1ef3d..59a9613609 100644 --- a/libavcodec/x86/dsputil_mmx.h +++ b/libavcodec/x86/dsputil_mmx.h @@ -196,7 +196,6 @@ void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); -void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx); void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd); diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c index 04b4abaad3..a525aeeeae 100644 --- a/libavcodec/x86/vc1dsp_mmx.c +++ b/libavcodec/x86/vc1dsp_mmx.c @@ -28,6 +28,7 @@ #include "libavutil/x86_cpu.h" #include "libavcodec/dsputil.h" #include "dsputil_mmx.h" +#include "libavcodec/vc1dsp.h" #define OP_PUT(S,D) #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t" @@ -712,30 +713,45 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq) ff_vc1_h_loop_filter8_sse4(src, stride, pq); ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq); } + #endif -void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) { +void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); +void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src, + int stride, int h, int x, int y); + +void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) +{ int mm_flags = av_get_cpu_flags(); - dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx; - dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx; - dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx; - dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx; - - dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx; - dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx; - dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx; - dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx; - - dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx; - dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx; - dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx; - dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx; - - dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx; - dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx; - dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx; - dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx; + if (mm_flags & AV_CPU_FLAG_MMX) { + dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx; + dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx; + dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx; + dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx; + + dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx; + dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx; + dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx; + dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx; + + dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx; + dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx; + dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx; + dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx; + + dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx; + dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx; + dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx; + dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx; + } if (mm_flags & AV_CPU_FLAG_MMX2){ dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2; @@ -775,11 +791,16 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) { #if HAVE_YASM if (mm_flags & AV_CPU_FLAG_MMX) { ASSIGN_LF(mmx); + dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd; } return; if (mm_flags & AV_CPU_FLAG_MMX2) { ASSIGN_LF(mmx2); + dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd; + } else if (mm_flags & AV_CPU_FLAG_3DNOW) { + dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd; } + if (mm_flags & AV_CPU_FLAG_SSE2) { dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_sse2; dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse2; @@ -788,6 +809,8 @@ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) { } if (mm_flags & AV_CPU_FLAG_SSSE3) { ASSIGN_LF(ssse3); + dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd; + dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd; } if (mm_flags & AV_CPU_FLAG_SSE4) { dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_sse4;