|
|
|
@ -230,144 +230,140 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, |
|
|
|
|
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx; |
|
|
|
|
if (cpu_flags & AV_CPU_FLAG_CMOV) |
|
|
|
|
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; |
|
|
|
|
|
|
|
|
|
if (EXTERNAL_MMXEXT(cpu_flags)) { |
|
|
|
|
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmxext; |
|
|
|
|
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext; |
|
|
|
|
c->h264_idct_add16 = ff_h264_idct_add16_8_mmxext; |
|
|
|
|
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmxext; |
|
|
|
|
if (chroma_format_idc == 1) |
|
|
|
|
c->h264_idct_add8 = ff_h264_idct_add8_8_mmxext; |
|
|
|
|
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmxext; |
|
|
|
|
|
|
|
|
|
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_mmxext; |
|
|
|
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmxext; |
|
|
|
|
if (chroma_format_idc == 1) { |
|
|
|
|
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_mmxext; |
|
|
|
|
c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
if (EXTERNAL_MMXEXT(cpu_flags)) { |
|
|
|
|
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmxext; |
|
|
|
|
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmxext; |
|
|
|
|
c->h264_idct_add16 = ff_h264_idct_add16_8_mmxext; |
|
|
|
|
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmxext; |
|
|
|
|
if (chroma_format_idc == 1) |
|
|
|
|
c->h264_idct_add8 = ff_h264_idct_add8_8_mmxext; |
|
|
|
|
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmxext; |
|
|
|
|
|
|
|
|
|
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_mmxext; |
|
|
|
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmxext; |
|
|
|
|
if (chroma_format_idc == 1) { |
|
|
|
|
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_mmxext; |
|
|
|
|
c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext; |
|
|
|
|
} |
|
|
|
|
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL |
|
|
|
|
c->h264_v_loop_filter_luma = deblock_v_luma_8_mmxext; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmxext; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = deblock_v_luma_intra_8_mmxext; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext; |
|
|
|
|
c->h264_v_loop_filter_luma = deblock_v_luma_8_mmxext; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmxext; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = deblock_v_luma_intra_8_mmxext; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext; |
|
|
|
|
#endif /* ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL */ |
|
|
|
|
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmxext; |
|
|
|
|
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmxext; |
|
|
|
|
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmxext; |
|
|
|
|
|
|
|
|
|
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmxext; |
|
|
|
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmxext; |
|
|
|
|
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext; |
|
|
|
|
|
|
|
|
|
if (EXTERNAL_SSE2(cpu_flags)) { |
|
|
|
|
c->h264_idct8_add = ff_h264_idct8_add_8_sse2; |
|
|
|
|
|
|
|
|
|
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; |
|
|
|
|
c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; |
|
|
|
|
if (chroma_format_idc == 1) |
|
|
|
|
c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; |
|
|
|
|
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; |
|
|
|
|
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2; |
|
|
|
|
|
|
|
|
|
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2; |
|
|
|
|
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2; |
|
|
|
|
|
|
|
|
|
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2; |
|
|
|
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2; |
|
|
|
|
|
|
|
|
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; |
|
|
|
|
} |
|
|
|
|
if (EXTERNAL_SSSE3(cpu_flags)) { |
|
|
|
|
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; |
|
|
|
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3; |
|
|
|
|
} |
|
|
|
|
if (EXTERNAL_AVX(cpu_flags)) { |
|
|
|
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmxext; |
|
|
|
|
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmxext; |
|
|
|
|
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmxext; |
|
|
|
|
|
|
|
|
|
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmxext; |
|
|
|
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmxext; |
|
|
|
|
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmxext; |
|
|
|
|
} |
|
|
|
|
if (EXTERNAL_SSE2(cpu_flags)) { |
|
|
|
|
c->h264_idct8_add = ff_h264_idct8_add_8_sse2; |
|
|
|
|
|
|
|
|
|
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; |
|
|
|
|
c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; |
|
|
|
|
if (chroma_format_idc == 1) |
|
|
|
|
c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; |
|
|
|
|
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; |
|
|
|
|
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2; |
|
|
|
|
|
|
|
|
|
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2; |
|
|
|
|
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2; |
|
|
|
|
|
|
|
|
|
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2; |
|
|
|
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2; |
|
|
|
|
|
|
|
|
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; |
|
|
|
|
} |
|
|
|
|
if (EXTERNAL_SSSE3(cpu_flags)) { |
|
|
|
|
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; |
|
|
|
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3; |
|
|
|
|
} |
|
|
|
|
if (EXTERNAL_AVX(cpu_flags)) { |
|
|
|
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; |
|
|
|
|
} |
|
|
|
|
} else if (bit_depth == 10) { |
|
|
|
|
if (EXTERNAL_MMX(cpu_flags)) { |
|
|
|
|
if (EXTERNAL_MMXEXT(cpu_flags)) { |
|
|
|
|
if (EXTERNAL_MMXEXT(cpu_flags)) { |
|
|
|
|
#if ARCH_X86_32 |
|
|
|
|
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmxext; |
|
|
|
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext; |
|
|
|
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_mmxext; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_mmxext; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext; |
|
|
|
|
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmxext; |
|
|
|
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext; |
|
|
|
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_mmxext; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_mmxext; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext; |
|
|
|
|
#endif /* ARCH_X86_32 */ |
|
|
|
|
c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext; |
|
|
|
|
if (EXTERNAL_SSE2(cpu_flags)) { |
|
|
|
|
c->h264_idct_add = ff_h264_idct_add_10_sse2; |
|
|
|
|
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; |
|
|
|
|
|
|
|
|
|
c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; |
|
|
|
|
if (chroma_format_idc == 1) |
|
|
|
|
c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; |
|
|
|
|
c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2; |
|
|
|
|
c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmxext; |
|
|
|
|
} |
|
|
|
|
if (EXTERNAL_SSE2(cpu_flags)) { |
|
|
|
|
c->h264_idct_add = ff_h264_idct_add_10_sse2; |
|
|
|
|
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; |
|
|
|
|
|
|
|
|
|
c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; |
|
|
|
|
if (chroma_format_idc == 1) |
|
|
|
|
c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; |
|
|
|
|
c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2; |
|
|
|
|
#if HAVE_ALIGNED_STACK |
|
|
|
|
c->h264_idct8_add = ff_h264_idct8_add_10_sse2; |
|
|
|
|
c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; |
|
|
|
|
c->h264_idct8_add = ff_h264_idct8_add_10_sse2; |
|
|
|
|
c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; |
|
|
|
|
#endif /* HAVE_ALIGNED_STACK */ |
|
|
|
|
|
|
|
|
|
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2; |
|
|
|
|
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2; |
|
|
|
|
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2; |
|
|
|
|
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2; |
|
|
|
|
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2; |
|
|
|
|
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2; |
|
|
|
|
|
|
|
|
|
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2; |
|
|
|
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2; |
|
|
|
|
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2; |
|
|
|
|
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2; |
|
|
|
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2; |
|
|
|
|
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2; |
|
|
|
|
|
|
|
|
|
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_sse2; |
|
|
|
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2; |
|
|
|
|
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_sse2; |
|
|
|
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2; |
|
|
|
|
#if HAVE_ALIGNED_STACK |
|
|
|
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; |
|
|
|
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; |
|
|
|
|
#endif /* HAVE_ALIGNED_STACK */ |
|
|
|
|
} |
|
|
|
|
if (EXTERNAL_SSE4(cpu_flags)) { |
|
|
|
|
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; |
|
|
|
|
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4; |
|
|
|
|
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4; |
|
|
|
|
|
|
|
|
|
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4; |
|
|
|
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; |
|
|
|
|
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; |
|
|
|
|
} |
|
|
|
|
if (EXTERNAL_AVX(cpu_flags)) { |
|
|
|
|
c->h264_idct_dc_add = |
|
|
|
|
c->h264_idct_add = ff_h264_idct_add_10_avx; |
|
|
|
|
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; |
|
|
|
|
|
|
|
|
|
c->h264_idct_add16 = ff_h264_idct_add16_10_avx; |
|
|
|
|
if (chroma_format_idc == 1) |
|
|
|
|
c->h264_idct_add8 = ff_h264_idct_add8_10_avx; |
|
|
|
|
c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx; |
|
|
|
|
} |
|
|
|
|
if (EXTERNAL_SSE4(cpu_flags)) { |
|
|
|
|
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; |
|
|
|
|
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4; |
|
|
|
|
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4; |
|
|
|
|
|
|
|
|
|
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4; |
|
|
|
|
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; |
|
|
|
|
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; |
|
|
|
|
} |
|
|
|
|
if (EXTERNAL_AVX(cpu_flags)) { |
|
|
|
|
c->h264_idct_dc_add = |
|
|
|
|
c->h264_idct_add = ff_h264_idct_add_10_avx; |
|
|
|
|
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; |
|
|
|
|
|
|
|
|
|
c->h264_idct_add16 = ff_h264_idct_add16_10_avx; |
|
|
|
|
if (chroma_format_idc == 1) |
|
|
|
|
c->h264_idct_add8 = ff_h264_idct_add8_10_avx; |
|
|
|
|
c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx; |
|
|
|
|
#if HAVE_ALIGNED_STACK |
|
|
|
|
c->h264_idct8_add = ff_h264_idct8_add_10_avx; |
|
|
|
|
c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx; |
|
|
|
|
c->h264_idct8_add = ff_h264_idct8_add_10_avx; |
|
|
|
|
c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx; |
|
|
|
|
#endif /* HAVE_ALIGNED_STACK */ |
|
|
|
|
|
|
|
|
|
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_avx; |
|
|
|
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx; |
|
|
|
|
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_avx; |
|
|
|
|
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx; |
|
|
|
|
#if HAVE_ALIGNED_STACK |
|
|
|
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; |
|
|
|
|
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx; |
|
|
|
|
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx; |
|
|
|
|
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx; |
|
|
|
|
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; |
|
|
|
|
#endif /* HAVE_ALIGNED_STACK */ |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|