|
|
|
@ -84,6 +84,8 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE) = { 0xFEFEFEFEFEFEFEFEULL, 0xFEF |
|
|
|
|
DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 }; |
|
|
|
|
DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; |
|
|
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
|
|
|
|
|
#define JUMPALIGN() __asm__ volatile (".p2align 3"::) |
|
|
|
|
#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::) |
|
|
|
|
|
|
|
|
@ -1836,6 +1838,8 @@ void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t *src, int stride) |
|
|
|
|
avg_pixels16_xy2_mmx(dst, src, stride, 16); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
|
|
#if HAVE_YASM |
|
|
|
|
typedef void emu_edge_core_func(uint8_t *buf, const uint8_t *src, |
|
|
|
|
x86_reg linesize, x86_reg start_y, |
|
|
|
@ -1904,6 +1908,8 @@ static av_noinline void emulated_edge_mc_sse(uint8_t *buf, const uint8_t *src, |
|
|
|
|
} |
|
|
|
|
#endif /* HAVE_YASM */ |
|
|
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
|
|
|
|
|
typedef void emulated_edge_mc_func(uint8_t *dst, const uint8_t *src, |
|
|
|
|
int linesize, int block_w, int block_h, |
|
|
|
|
int src_x, int src_y, int w, int h); |
|
|
|
@ -2073,6 +2079,8 @@ PREFETCH(prefetch_mmx2, prefetcht0) |
|
|
|
|
PREFETCH(prefetch_3dnow, prefetch) |
|
|
|
|
#undef PREFETCH |
|
|
|
|
|
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
|
|
#include "h264_qpel_mmx.c" |
|
|
|
|
|
|
|
|
|
void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src, |
|
|
|
@ -2118,6 +2126,8 @@ CHROMA_MC(avg, 8, 10, sse2) |
|
|
|
|
CHROMA_MC(put, 8, 10, avx) |
|
|
|
|
CHROMA_MC(avg, 8, 10, avx) |
|
|
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
|
|
|
|
|
/* CAVS-specific */ |
|
|
|
|
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) |
|
|
|
|
{ |
|
|
|
@ -2476,6 +2486,8 @@ static void vector_clipf_sse(float *dst, const float *src, |
|
|
|
|
); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
|
|
int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2, |
|
|
|
|
int order); |
|
|
|
|
int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, |
|
|
|
@ -2588,6 +2600,7 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) |
|
|
|
|
{ |
|
|
|
|
const int high_bit_depth = avctx->bits_per_raw_sample > 8; |
|
|
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
c->put_pixels_clamped = ff_put_pixels_clamped_mmx; |
|
|
|
|
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; |
|
|
|
|
c->add_pixels_clamped = ff_add_pixels_clamped_mmx; |
|
|
|
@ -2610,10 +2623,6 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) |
|
|
|
|
#if ARCH_X86_32 || !HAVE_YASM |
|
|
|
|
c->gmc = gmc_mmx; |
|
|
|
|
#endif |
|
|
|
|
#if ARCH_X86_32 && HAVE_YASM |
|
|
|
|
if (!high_bit_depth) |
|
|
|
|
c->emulated_edge_mc = emulated_edge_mc_mmx; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
c->add_bytes = add_bytes_mmx; |
|
|
|
|
|
|
|
|
@ -2621,8 +2630,14 @@ static void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int mm_flags) |
|
|
|
|
c->h263_v_loop_filter = h263_v_loop_filter_mmx; |
|
|
|
|
c->h263_h_loop_filter = h263_h_loop_filter_mmx; |
|
|
|
|
} |
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
|
|
#if HAVE_YASM |
|
|
|
|
#if ARCH_X86_32 |
|
|
|
|
if (!high_bit_depth) |
|
|
|
|
c->emulated_edge_mc = emulated_edge_mc_mmx; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
if (!high_bit_depth && CONFIG_H264CHROMA) { |
|
|
|
|
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_mmx_rnd; |
|
|
|
|
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx; |
|
|
|
@ -2639,6 +2654,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, |
|
|
|
|
const int bit_depth = avctx->bits_per_raw_sample; |
|
|
|
|
const int high_bit_depth = bit_depth > 8; |
|
|
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
c->prefetch = prefetch_mmx2; |
|
|
|
|
|
|
|
|
|
if (!high_bit_depth) { |
|
|
|
@ -2674,22 +2690,27 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, |
|
|
|
|
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2; |
|
|
|
|
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2; |
|
|
|
|
} |
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
|
|
if (CONFIG_H264QPEL) { |
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2, ); |
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
|
|
if (!high_bit_depth) { |
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2, ); |
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
} else if (bit_depth == 10) { |
|
|
|
|
#if HAVE_YASM |
|
|
|
|
#if !ARCH_X86_64 |
|
|
|
@ -2703,10 +2724,12 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx, |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, ); |
|
|
|
|
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, ); |
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#if HAVE_YASM |
|
|
|
@ -2741,6 +2764,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, |
|
|
|
|
{ |
|
|
|
|
const int high_bit_depth = avctx->bits_per_raw_sample > 8; |
|
|
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
c->prefetch = prefetch_3dnow; |
|
|
|
|
|
|
|
|
|
if (!high_bit_depth) { |
|
|
|
@ -2798,25 +2822,26 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, |
|
|
|
|
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, ); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
#if HAVE_YASM |
|
|
|
|
if (!high_bit_depth && CONFIG_H264CHROMA) { |
|
|
|
|
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_3dnow_rnd; |
|
|
|
|
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; |
|
|
|
|
|
|
|
|
|
#if HAVE_7REGS |
|
|
|
|
if (mm_flags & AV_CPU_FLAG_CMOV) |
|
|
|
|
c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; |
|
|
|
|
#endif |
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
|
|
#if HAVE_YASM |
|
|
|
|
if (!high_bit_depth && CONFIG_H264CHROMA) { |
|
|
|
|
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_3dnow_rnd; |
|
|
|
|
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx, |
|
|
|
|
int mm_flags) |
|
|
|
|
{ |
|
|
|
|
#if HAVE_6REGS |
|
|
|
|
#if HAVE_6REGS && HAVE_INLINE_ASM |
|
|
|
|
c->vector_fmul_window = vector_fmul_window_3dnow2; |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
@ -2825,6 +2850,7 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags) |
|
|
|
|
{ |
|
|
|
|
const int high_bit_depth = avctx->bits_per_raw_sample > 8; |
|
|
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
if (!high_bit_depth) { |
|
|
|
|
if (!(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)) { |
|
|
|
|
/* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ |
|
|
|
@ -2835,31 +2861,35 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags) |
|
|
|
|
|
|
|
|
|
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; |
|
|
|
|
c->ac3_downmix = ac3_downmix_sse; |
|
|
|
|
#if HAVE_YASM |
|
|
|
|
c->vector_fmul_reverse = ff_vector_fmul_reverse_sse; |
|
|
|
|
c->vector_fmul_add = ff_vector_fmul_add_sse; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#if HAVE_6REGS |
|
|
|
|
c->vector_fmul_window = vector_fmul_window_sse; |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
c->vector_clipf = vector_clipf_sse; |
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
|
|
#if HAVE_YASM |
|
|
|
|
c->vector_fmul_reverse = ff_vector_fmul_reverse_sse; |
|
|
|
|
c->vector_fmul_add = ff_vector_fmul_add_sse; |
|
|
|
|
|
|
|
|
|
c->scalarproduct_float = ff_scalarproduct_float_sse; |
|
|
|
|
c->butterflies_float_interleave = ff_butterflies_float_interleave_sse; |
|
|
|
|
|
|
|
|
|
if (!high_bit_depth) |
|
|
|
|
c->emulated_edge_mc = emulated_edge_mc_sse; |
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
c->gmc = gmc_sse; |
|
|
|
|
#endif |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, |
|
|
|
|
int mm_flags) |
|
|
|
|
{ |
|
|
|
|
const int bit_depth = avctx->bits_per_raw_sample; |
|
|
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
const int high_bit_depth = bit_depth > 8; |
|
|
|
|
|
|
|
|
|
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { |
|
|
|
@ -2887,6 +2917,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, |
|
|
|
|
H264_QPEL_FUNCS(3, 2, sse2); |
|
|
|
|
H264_QPEL_FUNCS(3, 3, sse2); |
|
|
|
|
} |
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
|
|
#if HAVE_YASM |
|
|
|
|
if (bit_depth == 10) { |
|
|
|
@ -2928,6 +2959,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, |
|
|
|
|
const int high_bit_depth = avctx->bits_per_raw_sample > 8; |
|
|
|
|
const int bit_depth = avctx->bits_per_raw_sample; |
|
|
|
|
|
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
if (!high_bit_depth && CONFIG_H264QPEL) { |
|
|
|
|
H264_QPEL_FUNCS(1, 0, ssse3); |
|
|
|
|
H264_QPEL_FUNCS(1, 1, ssse3); |
|
|
|
@ -2942,8 +2974,9 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, |
|
|
|
|
H264_QPEL_FUNCS(3, 2, ssse3); |
|
|
|
|
H264_QPEL_FUNCS(3, 3, ssse3); |
|
|
|
|
} |
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
#if HAVE_YASM |
|
|
|
|
else if (bit_depth == 10 && CONFIG_H264QPEL) { |
|
|
|
|
if (bit_depth == 10 && CONFIG_H264QPEL) { |
|
|
|
|
H264_QPEL_FUNCS_10(1, 0, ssse3_cache64); |
|
|
|
|
H264_QPEL_FUNCS_10(2, 0, ssse3_cache64); |
|
|
|
|
H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); |
|
|
|
@ -3007,6 +3040,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) |
|
|
|
|
int mm_flags = av_get_cpu_flags(); |
|
|
|
|
|
|
|
|
|
if (mm_flags & AV_CPU_FLAG_MMX) { |
|
|
|
|
#if HAVE_INLINE_ASM |
|
|
|
|
const int idct_algo = avctx->idct_algo; |
|
|
|
|
|
|
|
|
|
if (avctx->bits_per_raw_sample <= 8) { |
|
|
|
@ -3047,6 +3081,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#endif /* HAVE_INLINE_ASM */ |
|
|
|
|
|
|
|
|
|
dsputil_init_mmx(c, avctx, mm_flags); |
|
|
|
|
} |
|
|
|
|