From d8eda3708023db388d80027a79d5df7ee25a5a3f Mon Sep 17 00:00:00 2001 From: Diego Biurrun Date: Sun, 8 Jul 2012 19:56:10 +0200 Subject: [PATCH] x86: mmx2 ---> mmxext in function names --- libavcodec/dct-test.c | 4 +- libavcodec/dsputil.h | 2 +- libavcodec/x86/cavsdsp.c | 29 +++---- libavcodec/x86/dsputil_mmx.c | 144 ++++++++++++++++---------------- libavcodec/x86/dsputil_mmx.h | 10 +-- libavcodec/x86/dsputilenc_mmx.c | 25 ++++-- libavcodec/x86/fdct.c | 7 +- libavcodec/x86/h264_qpel.c | 54 ++++++------ libavcodec/x86/h264dsp_init.c | 13 ++- libavcodec/x86/idct_mmx_xvid.c | 11 +-- libavcodec/x86/idct_xvid.h | 6 +- libavcodec/x86/motion_est.c | 36 ++++---- libavcodec/x86/mpegvideoenc.c | 6 +- libavcodec/x86/vc1dsp_mmx.c | 65 +++++++------- libavfilter/x86/gradfun.c | 6 +- libavfilter/x86/yadif.c | 4 +- libswscale/utils.c | 21 ++--- libswscale/x86/rgb2rgb.c | 4 +- libswscale/x86/swscale.c | 4 +- libswscale/x86/yuv2rgb.c | 8 +- 20 files changed, 242 insertions(+), 217 deletions(-) diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c index 848ba8a957..c480aeccf1 100644 --- a/libavcodec/dct-test.c +++ b/libavcodec/dct-test.c @@ -83,7 +83,7 @@ static const struct algo fdct_tab[] = { #if HAVE_MMX_INLINE { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, - { "MMXEXT", ff_fdct_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT }, + { "MMXEXT", ff_fdct_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT }, { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 }, #endif @@ -107,7 +107,7 @@ static const struct algo idct_tab[] = { #if HAVE_MMX_INLINE { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 }, - { "XVID-MMXEXT", ff_idct_xvid_mmx2, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, + { "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 }, #endif diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index e38f7a744c..f48aa96017 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -49,7 +49,7 @@ void ff_j_rev_dct (DCTELEM *data); void ff_wmv2_idct_c(DCTELEM *data); void ff_fdct_mmx(DCTELEM *block); -void ff_fdct_mmx2(DCTELEM *block); +void ff_fdct_mmxext(DCTELEM *block); void ff_fdct_sse2(DCTELEM *block); #define H264_IDCT(depth) \ diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c index b628f080e4..f94e2f3f1b 100644 --- a/libavcodec/x86/cavsdsp.c +++ b/libavcodec/x86/cavsdsp.c @@ -438,21 +438,22 @@ static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, ui #endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */ #if HAVE_MMXEXT_INLINE -QPEL_CAVS(put_, PUT_OP, mmx2) -QPEL_CAVS(avg_,AVG_MMXEXT_OP, mmx2) +QPEL_CAVS(put_, PUT_OP, mmxext) +QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext) -CAVS_MC(put_, 8, mmx2) -CAVS_MC(put_, 16,mmx2) -CAVS_MC(avg_, 8, mmx2) -CAVS_MC(avg_, 16,mmx2) +CAVS_MC(put_, 8, mmxext) +CAVS_MC(put_, 16, mmxext) +CAVS_MC(avg_, 8, mmxext) +CAVS_MC(avg_, 16, mmxext) -static void ff_cavsdsp_init_mmx2(CAVSDSPContext* c, AVCodecContext *avctx) { +static void ff_cavsdsp_init_mmxext(CAVSDSPContext *c, AVCodecContext *avctx) +{ #define dspfunc(PFX, IDX, NUM) \ - c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \ - c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \ - c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \ - c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \ - c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \ + c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmxext; \ + c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmxext; \ + c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmxext; \ + c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmxext; \ dspfunc(put_cavs_qpel, 0, 16); dspfunc(put_cavs_qpel, 1, 8); @@ -475,7 +476,7 @@ CAVS_MC(avg_, 16,3dnow) static void ff_cavsdsp_init_3dnow(CAVSDSPContext* c, AVCodecContext *avctx) { #define dspfunc(PFX, IDX, NUM) \ - c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \ + c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmxext; \ c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \ c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \ c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \ @@ -496,7 +497,7 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx) int mm_flags = av_get_cpu_flags(); #if HAVE_MMXEXT_INLINE - if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmx2(c, avctx); + if (mm_flags & AV_CPU_FLAG_MMXEXT) ff_cavsdsp_init_mmxext(c, avctx); #endif /* HAVE_MMXEXT_INLINE */ #if HAVE_AMD3DNOW_INLINE if (mm_flags & AV_CPU_FLAG_3DNOW) ff_cavsdsp_init_3dnow(c, avctx); diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index 1e78c20a96..d23279b389 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -207,7 +207,7 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; /***********************************/ /* MMXEXT specific */ -#define DEF(x) x ## _mmx2 +#define DEF(x) x ## _mmxext /* Introduced only in MMXEXT set */ #define PAVGB "pavgb" @@ -221,11 +221,11 @@ DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; #define put_no_rnd_pixels16_mmx put_pixels16_mmx #define put_no_rnd_pixels8_mmx put_pixels8_mmx -#define put_pixels16_mmx2 put_pixels16_mmx -#define put_pixels8_mmx2 put_pixels8_mmx -#define put_pixels4_mmx2 put_pixels4_mmx -#define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx -#define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx +#define put_pixels16_mmxext put_pixels16_mmx +#define put_pixels8_mmxext put_pixels8_mmx +#define put_pixels4_mmxext put_pixels4_mmx +#define put_no_rnd_pixels16_mmxext put_no_rnd_pixels16_mmx +#define put_no_rnd_pixels8_mmxext put_no_rnd_pixels8_mmx #define put_pixels16_3dnow put_pixels16_mmx #define put_pixels8_3dnow put_pixels8_mmx #define put_pixels4_3dnow put_pixels4_mmx @@ -924,11 +924,11 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, OP(%%mm5, out, %%mm7, d) #define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMXEXT, OP_3DNOW) \ -static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, \ - uint8_t *src, \ - int dstStride, \ - int srcStride, \ - int h) \ +static void OPNAME ## mpeg4_qpel16_h_lowpass_mmxext(uint8_t *dst, \ + uint8_t *src, \ + int dstStride, \ + int srcStride, \ + int h) \ { \ uint64_t temp; \ \ @@ -1118,11 +1118,11 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, \ } \ } \ \ -static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, \ - uint8_t *src, \ - int dstStride, \ - int srcStride, \ - int h) \ +static void OPNAME ## mpeg4_qpel8_h_lowpass_mmxext(uint8_t *dst, \ + uint8_t *src, \ + int dstStride, \ + int srcStride, \ + int h) \ { \ __asm__ volatile ( \ "pxor %%mm7, %%mm7 \n\t" \ @@ -1755,9 +1755,9 @@ QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP) QPEL_OP(put_, ff_pw_16, _, PUT_OP, 3dnow) QPEL_OP(avg_, ff_pw_16, _, AVG_3DNOW_OP, 3dnow) QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow) -QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmx2) -QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmx2) -QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2) +QPEL_OP(put_, ff_pw_16, _, PUT_OP, mmxext) +QPEL_OP(avg_, ff_pw_16, _, AVG_MMXEXT_OP, mmxext) +QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmxext) /***********************************/ /* bilinear qpel: not compliant to any spec, only for -lavdopts fast */ @@ -1811,10 +1811,10 @@ QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1, stride, -1) \ QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1) \ QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride + 1, -stride, -1) \ -QPEL_2TAP(put_, 16, mmx2) -QPEL_2TAP(avg_, 16, mmx2) -QPEL_2TAP(put_, 8, mmx2) -QPEL_2TAP(avg_, 8, mmx2) +QPEL_2TAP(put_, 16, mmxext) +QPEL_2TAP(avg_, 16, mmxext) +QPEL_2TAP(put_, 8, mmxext) +QPEL_2TAP(avg_, 8, mmxext) QPEL_2TAP(put_, 16, 3dnow) QPEL_2TAP(avg_, 16, 3dnow) QPEL_2TAP(put_, 8, 3dnow) @@ -2035,7 +2035,7 @@ static void name(void *mem, int stride, int h) \ } while (--h); \ } -PREFETCH(prefetch_mmx2, prefetcht0) +PREFETCH(prefetch_mmxext, prefetcht0) PREFETCH(prefetch_3dnow, prefetch) #undef PREFETCH @@ -2089,22 +2089,22 @@ CHROMA_MC(avg, 8, 10, avx) #if HAVE_INLINE_ASM /* CAVS-specific */ -void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) +void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) { put_pixels8_mmx(dst, src, stride, 8); } -void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) +void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) { avg_pixels8_mmx(dst, src, stride, 8); } -void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) +void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) { put_pixels16_mmx(dst, src, stride, 16); } -void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) +void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride) { avg_pixels16_mmx(dst, src, stride, 16); } @@ -2116,10 +2116,10 @@ void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, put_pixels8_mmx(dst, src, stride, 8); } -void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, - int stride, int rnd) +void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, + int stride, int rnd) { - avg_pixels8_mmx2(dst, src, stride, 8); + avg_pixels8_mmxext(dst, src, stride, 8); } static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) @@ -2456,74 +2456,74 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) } -static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, - int mm_flags) +static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, + int mm_flags) { const int bit_depth = avctx->bits_per_raw_sample; const int high_bit_depth = bit_depth > 8; #if HAVE_INLINE_ASM - c->prefetch = prefetch_mmx2; + c->prefetch = prefetch_mmxext; if (!high_bit_depth) { - c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; - c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; + c->put_pixels_tab[0][1] = put_pixels16_x2_mmxext; + c->put_pixels_tab[0][2] = put_pixels16_y2_mmxext; - c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; - c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; - c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; + c->avg_pixels_tab[0][0] = avg_pixels16_mmxext; + c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmxext; + c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmxext; - c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; - c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; + c->put_pixels_tab[1][1] = put_pixels8_x2_mmxext; + c->put_pixels_tab[1][2] = put_pixels8_y2_mmxext; - c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; - c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; - c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; + c->avg_pixels_tab[1][0] = avg_pixels8_mmxext; + c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmxext; + c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmxext; } if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { if (!high_bit_depth) { - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmxext; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmxext; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmxext; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmxext; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmxext; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmxext; } } if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) { - c->idct_put = ff_idct_xvid_mmx2_put; - c->idct_add = ff_idct_xvid_mmx2_add; - c->idct = ff_idct_xvid_mmx2; + c->idct_put = ff_idct_xvid_mmxext_put; + c->idct_add = ff_idct_xvid_mmxext_add; + c->idct = ff_idct_xvid_mmxext; } if (CONFIG_VP3_DECODER && (avctx->codec_id == AV_CODEC_ID_VP3 || avctx->codec_id == AV_CODEC_ID_THEORA)) { - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmxext; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmxext; } #endif /* HAVE_INLINE_ASM */ if (CONFIG_H264QPEL) { #if HAVE_INLINE_ASM - SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(put_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, ); #endif /* HAVE_INLINE_ASM */ if (!high_bit_depth) { #if HAVE_INLINE_ASM - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, ); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, ); + SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); #endif /* HAVE_INLINE_ASM */ } else if (bit_depth == 10) { #if HAVE_YASM @@ -2539,10 +2539,10 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, } #if HAVE_INLINE_ASM - SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, ); - SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, ); - SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, ); + SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmxext, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmxext, ); + SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmxext, ); #endif /* HAVE_INLINE_ASM */ } @@ -2861,7 +2861,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) dsputil_init_mmx(c, avctx, mm_flags); if (mm_flags & AV_CPU_FLAG_MMXEXT) - dsputil_init_mmx2(c, avctx, mm_flags); + dsputil_init_mmxext(c, avctx, mm_flags); if (mm_flags & AV_CPU_FLAG_3DNOW) dsputil_init_3dnow(c, avctx, mm_flags); diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h index bd14c5ad41..a142406a6e 100644 --- a/libavcodec/x86/dsputil_mmx.h +++ b/libavcodec/x86/dsputil_mmx.h @@ -89,13 +89,13 @@ void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_s void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size); -void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); -void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); -void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); -void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride); +void ff_put_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); +void ff_avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); +void ff_put_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); +void ff_avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, uint8_t *src, int stride); void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd); -void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd); +void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, int stride, int rnd); void ff_put_rv40_qpel8_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); void ff_put_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t *pixels, int line_size); diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index 43940bdf81..883d96566c 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -647,7 +647,9 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si } #undef SUM -static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) { +static int vsad_intra16_mmxext(void *v, uint8_t *pix, uint8_t *dummy, + int line_size, int h) +{ int tmp; assert( (((int)pix) & 7) == 0); @@ -765,7 +767,9 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in } #undef SUM -static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { +static int vsad16_mmxext(void *v, uint8_t *pix1, uint8_t *pix2, + int line_size, int h) +{ int tmp; assert( (((int)pix1) & 7) == 0); @@ -844,7 +848,10 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ dst[i+0] = src1[i+0]-src2[i+0]; } -static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){ +static void sub_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *src1, + const uint8_t *src2, int w, + int *left, int *left_top) +{ x86_reg i=0; uint8_t l, lt; @@ -976,7 +983,7 @@ DCT_SAD_FUNC(mmx) #define HSUM(a,t,dst) HSUM_MMXEXT(a,t,dst) #define MMABS(a,z) MMABS_MMXEXT(a,z) -DCT_SAD_FUNC(mmx2) +DCT_SAD_FUNC(mmxext) #undef HSUM #undef DCT_SAD @@ -1115,7 +1122,7 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) if(mm_flags & AV_CPU_FLAG_SSE2){ c->fdct = ff_fdct_sse2; } else if (mm_flags & AV_CPU_FLAG_MMXEXT) { - c->fdct = ff_fdct_mmx2; + c->fdct = ff_fdct_mmxext; }else{ c->fdct = ff_fdct_mmx; } @@ -1148,14 +1155,14 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx) c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx; if (mm_flags & AV_CPU_FLAG_MMXEXT) { - c->sum_abs_dctelem= sum_abs_dctelem_mmx2; - c->vsad[4]= vsad_intra16_mmx2; + c->sum_abs_dctelem = sum_abs_dctelem_mmxext; + c->vsad[4] = vsad_intra16_mmxext; if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->vsad[0] = vsad16_mmx2; + c->vsad[0] = vsad16_mmxext; } - c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; + c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_mmxext; } if(mm_flags & AV_CPU_FLAG_SSE2){ diff --git a/libavcodec/x86/fdct.c b/libavcodec/x86/fdct.c index f9bd3f2508..b37238dfac 100644 --- a/libavcodec/x86/fdct.c +++ b/libavcodec/x86/fdct.c @@ -440,7 +440,8 @@ static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) ); } -static av_always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) +static av_always_inline void fdct_row_mmxext(const int16_t *in, int16_t *out, + const int16_t *table) { __asm__ volatile ( "pshufw $0x1B, 8(%0), %%mm5 \n\t" @@ -555,7 +556,7 @@ void ff_fdct_mmx(int16_t *block) } } -void ff_fdct_mmx2(int16_t *block) +void ff_fdct_mmxext(int16_t *block) { DECLARE_ALIGNED(8, int64_t, align_tmp)[16]; int16_t *block1= (int16_t*)align_tmp; @@ -566,7 +567,7 @@ void ff_fdct_mmx2(int16_t *block) fdct_col_mmx(block, block1, 4); for(i=8;i>0;i--) { - fdct_row_mmx2(block1, block, table); + fdct_row_mmxext(block1, block, table); block1 += 8; table += 32; block += 8; diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 5a2db781d2..f978520719 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -1002,36 +1002,36 @@ static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\ }\ -#define put_pixels8_l2_sse2 put_pixels8_l2_mmx2 -#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmx2 -#define put_pixels16_l2_sse2 put_pixels16_l2_mmx2 -#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmx2 -#define put_pixels8_l2_ssse3 put_pixels8_l2_mmx2 -#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmx2 -#define put_pixels16_l2_ssse3 put_pixels16_l2_mmx2 -#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmx2 +#define put_pixels8_l2_sse2 put_pixels8_l2_mmxext +#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmxext +#define put_pixels16_l2_sse2 put_pixels16_l2_mmxext +#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmxext +#define put_pixels8_l2_ssse3 put_pixels8_l2_mmxext +#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmxext +#define put_pixels16_l2_ssse3 put_pixels16_l2_mmxext +#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmxext -#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmx2 -#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmx2 -#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmx2 -#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmx2 -#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmx2 -#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmx2 -#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmx2 -#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmx2 +#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmxext +#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmxext +#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmxext +#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmxext +#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmxext +#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmxext +#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmxext +#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmxext -#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmx2 -#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmx2 -#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmx2 -#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmx2 +#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmxext +#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmxext +#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmxext +#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmxext #define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2 #define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2 #define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2 #define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2 -#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmx2 -#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmx2 +#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmxext +#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmxext #define H264_MC(OPNAME, SIZE, MMX, ALIGN) \ H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ @@ -1045,8 +1045,8 @@ static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){ static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){ avg_pixels16_sse2(dst, src, stride, 16); } -#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmx2 -#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmx2 +#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmxext +#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmxext #define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \ static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\ @@ -1168,8 +1168,8 @@ QPEL_H264(put_, PUT_OP, 3dnow) QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow) #undef PAVGB #define PAVGB "pavgb" -QPEL_H264(put_, PUT_OP, mmx2) -QPEL_H264(avg_,AVG_MMXEXT_OP, mmx2) +QPEL_H264(put_, PUT_OP, mmxext) +QPEL_H264(avg_, AVG_MMXEXT_OP, mmxext) QPEL_H264_V_XMM(put_, PUT_OP, sse2) QPEL_H264_V_XMM(avg_,AVG_MMXEXT_OP, sse2) QPEL_H264_HV_XMM(put_, PUT_OP, sse2) @@ -1185,7 +1185,7 @@ QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3) #undef PAVGB H264_MC_4816(3dnow) -H264_MC_4816(mmx2) +H264_MC_4816(mmxext) H264_MC_816(H264_MC_V, sse2) H264_MC_816(H264_MC_HV, sse2) #if HAVE_SSSE3_INLINE diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index 3f6ded46e1..913c362ee3 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -130,18 +130,17 @@ LF_FUNCS(uint16_t, 10) #if ARCH_X86_32 LF_FUNC(v8, luma, 8, mmx2) -static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha, - int beta, int8_t *tc0) +static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0) { if ((tc0[0] & tc0[1]) >= 0) ff_deblock_v8_luma_8_mmx2(pix + 0, stride, alpha, beta, tc0); if ((tc0[2] & tc0[3]) >= 0) ff_deblock_v8_luma_8_mmx2(pix + 8, stride, alpha, beta, tc0 + 2); } - LF_IFUNC(v8, luma_intra, 8, mmx2) -static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride, - int alpha, int beta) +static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, + int alpha, int beta) { ff_deblock_v8_luma_intra_8_mmx2(pix + 0, stride, alpha, beta); ff_deblock_v8_luma_intra_8_mmx2(pix + 8, stride, alpha, beta); @@ -246,9 +245,9 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmx2; } #if ARCH_X86_32 - c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmx2; + c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmxext; c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmx2; - c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2; + c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2; #endif /* ARCH_X86_32 */ c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmx2; diff --git a/libavcodec/x86/idct_mmx_xvid.c b/libavcodec/x86/idct_mmx_xvid.c index 08a627d5b9..2cf8b47d62 100644 --- a/libavcodec/x86/idct_mmx_xvid.c +++ b/libavcodec/x86/idct_mmx_xvid.c @@ -512,7 +512,8 @@ __asm__ volatile( //----------------------------------------------------------------------------- -void ff_idct_xvid_mmx2(short *block){ +void ff_idct_xvid_mmxext(short *block) +{ __asm__ volatile( //# Process each row DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) @@ -542,15 +543,15 @@ void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block) ff_add_pixels_clamped_mmx(block, dest, line_size); } -void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block) +void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, DCTELEM *block) { - ff_idct_xvid_mmx2(block); + ff_idct_xvid_mmxext(block); ff_put_pixels_clamped_mmx(block, dest, line_size); } -void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) +void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, DCTELEM *block) { - ff_idct_xvid_mmx2(block); + ff_idct_xvid_mmxext(block); ff_add_pixels_clamped_mmx(block, dest, line_size); } diff --git a/libavcodec/x86/idct_xvid.h b/libavcodec/x86/idct_xvid.h index 82fa990873..79d5bf96a8 100644 --- a/libavcodec/x86/idct_xvid.h +++ b/libavcodec/x86/idct_xvid.h @@ -34,9 +34,9 @@ void ff_idct_xvid_mmx(short *block); void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block); void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block); -void ff_idct_xvid_mmx2(short *block); -void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block); -void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block); +void ff_idct_xvid_mmxext(short *block); +void ff_idct_xvid_mmxext_put(uint8_t *dest, int line_size, DCTELEM *block); +void ff_idct_xvid_mmxext_add(uint8_t *dest, int line_size, DCTELEM *block); void ff_idct_xvid_sse2(short *block); void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block); diff --git a/libavcodec/x86/motion_est.c b/libavcodec/x86/motion_est.c index 6eb44d4b2d..0a0cab9cd2 100644 --- a/libavcodec/x86/motion_est.c +++ b/libavcodec/x86/motion_est.c @@ -74,7 +74,8 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) ); } -static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) +static inline void sad8_1_mmxext(uint8_t *blk1, uint8_t *blk2, + int stride, int h) { __asm__ volatile( ".p2align 4 \n\t" @@ -120,7 +121,8 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) return ret; } -static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) +static inline void sad8_x2a_mmxext(uint8_t *blk1, uint8_t *blk2, + int stride, int h) { __asm__ volatile( ".p2align 4 \n\t" @@ -142,7 +144,8 @@ static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h ); } -static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) +static inline void sad8_y2a_mmxext(uint8_t *blk1, uint8_t *blk2, + int stride, int h) { __asm__ volatile( "movq (%1), %%mm0 \n\t" @@ -167,7 +170,8 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h ); } -static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) +static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2, + int stride, int h) { __asm__ volatile( "movq "MANGLE(bone)", %%mm5 \n\t" @@ -304,7 +308,7 @@ static inline int sum_mmx(void) return ret&0xFFFF; } -static inline int sum_mmx2(void) +static inline int sum_mmxext(void) { int ret; __asm__ volatile( @@ -424,7 +428,7 @@ static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, }\ PIX_SAD(mmx) -PIX_SAD(mmx2) +PIX_SAD(mmxext) #endif /* HAVE_INLINE_ASM */ @@ -447,19 +451,19 @@ void ff_dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) c->sad[1]= sad8_mmx; } if (mm_flags & AV_CPU_FLAG_MMXEXT) { - c->pix_abs[0][0] = sad16_mmx2; - c->pix_abs[1][0] = sad8_mmx2; + c->pix_abs[0][0] = sad16_mmxext; + c->pix_abs[1][0] = sad8_mmxext; - c->sad[0]= sad16_mmx2; - c->sad[1]= sad8_mmx2; + c->sad[0] = sad16_mmxext; + c->sad[1] = sad8_mmxext; if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ - c->pix_abs[0][1] = sad16_x2_mmx2; - c->pix_abs[0][2] = sad16_y2_mmx2; - c->pix_abs[0][3] = sad16_xy2_mmx2; - c->pix_abs[1][1] = sad8_x2_mmx2; - c->pix_abs[1][2] = sad8_y2_mmx2; - c->pix_abs[1][3] = sad8_xy2_mmx2; + c->pix_abs[0][1] = sad16_x2_mmxext; + c->pix_abs[0][2] = sad16_y2_mmxext; + c->pix_abs[0][3] = sad16_xy2_mmxext; + c->pix_abs[1][1] = sad8_x2_mmxext; + c->pix_abs[1][2] = sad8_y2_mmxext; + c->pix_abs[1][3] = sad8_xy2_mmxext; } } if ((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW) && avctx->codec_id != AV_CODEC_ID_SNOW) { diff --git a/libavcodec/x86/mpegvideoenc.c b/libavcodec/x86/mpegvideoenc.c index 59e3580153..8f7c2e474a 100644 --- a/libavcodec/x86/mpegvideoenc.c +++ b/libavcodec/x86/mpegvideoenc.c @@ -47,8 +47,8 @@ extern uint16_t ff_inv_zigzag_direct16[64]; #define COMPILE_TEMPLATE_SSSE3 0 #undef RENAME #undef RENAMEl -#define RENAME(a) a ## _MMX2 -#define RENAMEl(a) a ## _mmx2 +#define RENAME(a) a ## _MMXEXT +#define RENAMEl(a) a ## _mmxext #include "mpegvideoenc_template.c" #endif /* HAVE_MMXEXT_INLINE */ @@ -92,7 +92,7 @@ void ff_MPV_encode_init_x86(MpegEncContext *s) #endif #if HAVE_MMXEXT_INLINE if (INLINE_MMXEXT(mm_flags)) - s->dct_quantize = dct_quantize_MMX2; + s->dct_quantize = dct_quantize_MMXEXT; #endif #if HAVE_SSE2_INLINE if (INLINE_SSE2(mm_flags)) diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c index 6b1ae37efd..b02582f615 100644 --- a/libavcodec/x86/vc1dsp_mmx.c +++ b/libavcodec/x86/vc1dsp_mmx.c @@ -467,7 +467,10 @@ VC1_MSPEL_MC(avg_) static void put_vc1_mspel_mc ## a ## b ## _mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ }\ -static void avg_vc1_mspel_mc ## a ## b ## _mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ +static void avg_vc1_mspel_mc ## a ## b ## _mmxext(uint8_t *dst, \ + const uint8_t *src, \ + int stride, int rnd) \ +{ \ avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ } @@ -490,7 +493,8 @@ DECLARE_FUNCTION(3, 1) DECLARE_FUNCTION(3, 2) DECLARE_FUNCTION(3, 3) -static void vc1_inv_trans_4x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) +static void vc1_inv_trans_4x4_dc_mmxext(uint8_t *dest, int linesize, + DCTELEM *block) { int dc = block[0]; dc = (17 * dc + 4) >> 3; @@ -528,7 +532,8 @@ static void vc1_inv_trans_4x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc ); } -static void vc1_inv_trans_4x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) +static void vc1_inv_trans_4x8_dc_mmxext(uint8_t *dest, int linesize, + DCTELEM *block) { int dc = block[0]; dc = (17 * dc + 4) >> 3; @@ -589,7 +594,8 @@ static void vc1_inv_trans_4x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc ); } -static void vc1_inv_trans_8x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) +static void vc1_inv_trans_8x4_dc_mmxext(uint8_t *dest, int linesize, + DCTELEM *block) { int dc = block[0]; dc = ( 3 * dc + 1) >> 1; @@ -627,7 +633,8 @@ static void vc1_inv_trans_8x4_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *bloc ); } -static void vc1_inv_trans_8x8_dc_mmx2(uint8_t *dest, int linesize, DCTELEM *block) +static void vc1_inv_trans_8x8_dc_mmxext(uint8_t *dest, int linesize, + DCTELEM *block) { int dc = block[0]; dc = (3 * dc + 1) >> 1; @@ -713,29 +720,29 @@ av_cold void ff_vc1dsp_init_mmx(VC1DSPContext *dsp) av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp) { - dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmx2; - dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmx2; - - dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmx2; - dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmx2; - - dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmx2; - dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmx2; - dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmx2; - - dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmx2; - dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmx2; - dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmx2; - dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmx2; - - dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmx2; - dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmx2; - dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmx2; - dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmx2; + dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_mmxext; + dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_mmxext; + + dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_mmxext; + dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_mmxext; + + dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_mmxext; + dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_mmxext; + dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_mmxext; + + dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_mmxext; + dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_mmxext; + dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_mmxext; + dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_mmxext; + + dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_mmxext; + dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_mmxext; + dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmxext; + dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmxext; } #endif /* HAVE_INLINE_ASM */ diff --git a/libavfilter/x86/gradfun.c b/libavfilter/x86/gradfun.c index 424a03138b..b4ca86c617 100644 --- a/libavfilter/x86/gradfun.c +++ b/libavfilter/x86/gradfun.c @@ -30,7 +30,9 @@ DECLARE_ALIGNED(16, static const uint16_t, pw_7f)[8] = {0x7F,0x7F,0x7F,0x7F,0x7F DECLARE_ALIGNED(16, static const uint16_t, pw_ff)[8] = {0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; #if HAVE_MMXEXT_INLINE -static void gradfun_filter_line_mmx2(uint8_t *dst, uint8_t *src, uint16_t *dc, int width, int thresh, const uint16_t *dithers) +static void gradfun_filter_line_mmxext(uint8_t *dst, uint8_t *src, uint16_t *dc, + int width, int thresh, + const uint16_t *dithers) { intptr_t x; if (width & 3) { @@ -175,7 +177,7 @@ av_cold void ff_gradfun_init_x86(GradFunContext *gf) #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) - gf->filter_line = gradfun_filter_line_mmx2; + gf->filter_line = gradfun_filter_line_mmxext; #endif #if HAVE_SSSE3_INLINE if (cpu_flags & AV_CPU_FLAG_SSSE3) diff --git a/libavfilter/x86/yadif.c b/libavfilter/x86/yadif.c index f178b32cbe..ab1d282f9d 100644 --- a/libavfilter/x86/yadif.c +++ b/libavfilter/x86/yadif.c @@ -49,7 +49,7 @@ DECLARE_ASM_CONST(16, const xmm_reg, pw_1) = {0x0001000100010001ULL, 0x000100010 #if HAVE_MMXEXT_INLINE #undef RENAME -#define RENAME(a) a ## _mmx2 +#define RENAME(a) a ## _mmxext #include "yadif_template.c" #endif @@ -61,7 +61,7 @@ av_cold void ff_yadif_init_x86(YADIFContext *yadif) #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) - yadif->filter_line = yadif_filter_line_mmx2; + yadif->filter_line = yadif_filter_line_mmxext; #endif #if HAVE_SSE2_INLINE if (cpu_flags & AV_CPU_FLAG_SSE2) diff --git a/libswscale/utils.c b/libswscale/utils.c index 64a3a58067..e5e4d60dd2 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -600,8 +600,9 @@ fail: } #if HAVE_MMXEXT_INLINE -static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode, - int16_t *filter, int32_t *filterPos, int numSplits) +static int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, + int16_t *filter, int32_t *filterPos, + int numSplits) { uint8_t *fragmentA; x86_reg imm8OfPShufW1A; @@ -1043,10 +1044,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, #if HAVE_MMXEXT_INLINE // can't downscale !!! if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) { - c->lumMmxextFilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL, - NULL, NULL, 8); - c->chrMmxextFilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc, - NULL, NULL, NULL, 4); + c->lumMmxextFilterCodeSize = init_hscaler_mmxext(dstW, c->lumXInc, NULL, + NULL, NULL, 8); + c->chrMmxextFilterCodeSize = init_hscaler_mmxext(c->chrDstW, c->chrXInc, + NULL, NULL, NULL, 4); #if USE_MMAP c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize, @@ -1078,10 +1079,10 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail); - initMMX2HScaler(dstW, c->lumXInc, c->lumMmxextFilterCode, - c->hLumFilter, c->hLumFilterPos, 8); - initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode, - c->hChrFilter, c->hChrFilterPos, 4); + init_hscaler_mmxext(dstW, c->lumXInc, c->lumMmxextFilterCode, + c->hLumFilter, c->hLumFilterPos, 8); + init_hscaler_mmxext(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode, + c->hChrFilter, c->hChrFilterPos, 4); #if USE_MMAP mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ); diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index 486f436702..d4f25804cc 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -99,7 +99,7 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMX2 +#define RENAME(a) a ## _MMXEXT #include "rgb2rgb_template.c" //SSE2 versions @@ -139,7 +139,7 @@ av_cold void rgb2rgb_init_x86(void) if (INLINE_AMD3DNOW(cpu_flags)) rgb2rgb_init_3DNOW(); if (INLINE_MMXEXT(cpu_flags)) - rgb2rgb_init_MMX2(); + rgb2rgb_init_MMXEXT(); if (INLINE_SSE2(cpu_flags)) rgb2rgb_init_SSE2(); #endif /* HAVE_INLINE_ASM */ diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index fc74d97201..571510ae43 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -83,7 +83,7 @@ DECLARE_ALIGNED(8, const uint64_t, ff_w1111) = 0x0001000100010001ULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMX2 +#define RENAME(a) a ## _MMXEXT #include "swscale_template.c" #endif @@ -311,7 +311,7 @@ av_cold void ff_sws_init_swScale_mmx(SwsContext *c) sws_init_swScale_MMX(c); #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) - sws_init_swScale_MMX2(c); + sws_init_swScale_MMXEXT(c); #endif #endif /* HAVE_INLINE_ASM */ diff --git a/libswscale/x86/yuv2rgb.c b/libswscale/x86/yuv2rgb.c index 17ac3e2ffe..419d5133f9 100644 --- a/libswscale/x86/yuv2rgb.c +++ b/libswscale/x86/yuv2rgb.c @@ -63,7 +63,7 @@ DECLARE_ASM_CONST(8, uint64_t, pb_07) = 0x0707070707070707ULL; #undef RENAME #undef COMPILE_TEMPLATE_MMXEXT #define COMPILE_TEMPLATE_MMXEXT 1 -#define RENAME(a) a ## _MMX2 +#define RENAME(a) a ## _MMXEXT #include "yuv2rgb_template.c" #endif /* HAVE_MMXEXT_INLINE */ @@ -81,8 +81,10 @@ av_cold SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) #if HAVE_MMXEXT_INLINE if (cpu_flags & AV_CPU_FLAG_MMXEXT) { switch (c->dstFormat) { - case AV_PIX_FMT_RGB24: return yuv420_rgb24_MMX2; - case AV_PIX_FMT_BGR24: return yuv420_bgr24_MMX2; + case AV_PIX_FMT_RGB24: + return yuv420_rgb24_MMXEXT; + case AV_PIX_FMT_BGR24: + return yuv420_bgr24_MMXEXT; } } #endif