x86: conditionally compile H.264 QPEL optimizations

pull/4/head
Diego Biurrun 13 years ago
parent 3816642eab
commit 915a2a0a65
  1. 15
      configure
  2. 2
      libavcodec/x86/Makefile
  3. 18
      libavcodec/x86/dsputil_mmx.c

15
configure vendored

@ -1165,6 +1165,7 @@ CONFIG_EXTRA="
h264chroma
h264dsp
h264pred
h264qpel
huffman
lgplv3
lpc
@ -1311,7 +1312,7 @@ h263_encoder_select="aandct"
h263_vaapi_hwaccel_select="vaapi h263_decoder"
h263i_decoder_select="h263_decoder"
h263p_encoder_select="h263_encoder"
h264_decoder_select="golomb h264chroma h264dsp h264pred"
h264_decoder_select="golomb h264chroma h264dsp h264pred h264qpel"
h264_dxva2_hwaccel_deps="dxva2api_h"
h264_dxva2_hwaccel_select="dxva2 h264_decoder"
h264_vaapi_hwaccel_select="vaapi h264_decoder"
@ -1366,14 +1367,14 @@ rv10_decoder_select="h263_decoder"
rv10_encoder_select="h263_encoder"
rv20_decoder_select="h263_decoder"
rv20_encoder_select="h263_encoder"
rv30_decoder_select="golomb h264chroma h264pred"
rv40_decoder_select="golomb h264chroma h264pred"
rv30_decoder_select="golomb h264chroma h264pred h264qpel"
rv40_decoder_select="golomb h264chroma h264pred h264qpel"
shorten_decoder_select="golomb"
sipr_decoder_select="lsp"
snow_decoder_select="dwt"
snow_encoder_select="aandct dwt"
svq1_encoder_select="aandct"
svq3_decoder_select="golomb h264chroma h264dsp h264pred"
svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel"
svq3_decoder_suggest="zlib"
theora_decoder_select="vp3_decoder"
tiff_decoder_suggest="zlib"
@ -1381,7 +1382,7 @@ tiff_encoder_suggest="zlib"
truehd_decoder_select="mlp_decoder"
tscc_decoder_select="zlib"
twinvq_decoder_select="mdct lsp sinewin"
vc1_decoder_select="h263_decoder h264chroma"
vc1_decoder_select="h263_decoder h264chroma h264qpel"
vc1_dxva2_hwaccel_deps="dxva2api_h"
vc1_dxva2_hwaccel_select="dxva2 vc1_decoder"
vc1_vaapi_hwaccel_select="vaapi vc1_decoder"
@ -1392,7 +1393,7 @@ vorbis_encoder_select="mdct"
vp6_decoder_select="huffman"
vp6a_decoder_select="vp6_decoder"
vp6f_decoder_select="vp6_decoder"
vp8_decoder_select="h264pred"
vp8_decoder_select="h264pred h264qpel"
wmapro_decoder_select="mdct sinewin"
wmav1_decoder_select="mdct sinewin"
wmav1_encoder_select="mdct sinewin"
@ -1419,7 +1420,7 @@ vda_deps="VideoDecodeAcceleration_VDADecoder_h pthreads"
vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h"
# parsers
h264_parser_select="golomb h264chroma h264dsp h264pred"
h264_parser_select="golomb h264chroma h264dsp h264pred h264qpel"
# external libraries
libdirac_decoder_deps="libdirac !libschroedinger"

@ -23,6 +23,7 @@ YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \
YASM-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred.o \
x86/h264_intrapred_10bit.o
MMX-OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o
YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_10bit.o
MMX-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp_init.o
YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o
@ -62,7 +63,6 @@ MMX-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp-init.o
MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
x86/deinterlace.o \
x86/fmtconvert.o \
x86/h264_qpel_10bit.o \
$(YASM-OBJS-yes)
MMX-OBJS-$(CONFIG_FFT) += x86/fft.o

@ -2479,6 +2479,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
}
if (CONFIG_H264QPEL) {
SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2, );
SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2, );
@ -2510,6 +2511,7 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2, );
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2, );
}
#if HAVE_YASM
if (!high_bit_depth && CONFIG_H264CHROMA) {
@ -2577,6 +2579,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
}
if (CONFIG_H264QPEL) {
SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow, );
SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow, );
SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow, );
@ -2597,6 +2600,7 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow, );
SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow, );
SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow, );
}
#if HAVE_YASM
if (!high_bit_depth && CONFIG_H264CHROMA) {
@ -2671,11 +2675,12 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
c->put_pixels_tab[0][0] = put_pixels16_sse2;
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
H264_QPEL_FUNCS(0, 0, sse2);
if (CONFIG_H264QPEL)
H264_QPEL_FUNCS(0, 0, sse2);
}
}
if (!high_bit_depth) {
if (!high_bit_depth && CONFIG_H264QPEL) {
H264_QPEL_FUNCS(0, 1, sse2);
H264_QPEL_FUNCS(0, 2, sse2);
H264_QPEL_FUNCS(0, 3, sse2);
@ -2692,6 +2697,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
#if HAVE_YASM
if (bit_depth == 10) {
if (CONFIG_H264QPEL) {
SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_sse2, ff_);
SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_);
SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_);
@ -2699,7 +2705,7 @@ static void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
H264_QPEL_FUNCS_10(1, 0, sse2_cache64);
H264_QPEL_FUNCS_10(2, 0, sse2_cache64);
H264_QPEL_FUNCS_10(3, 0, sse2_cache64);
}
if (CONFIG_H264CHROMA) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
@ -2729,7 +2735,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
const int bit_depth = avctx->bits_per_raw_sample;
if (!high_bit_depth) {
if (!high_bit_depth && CONFIG_H264QPEL) {
H264_QPEL_FUNCS(1, 0, ssse3);
H264_QPEL_FUNCS(1, 1, ssse3);
H264_QPEL_FUNCS(1, 2, ssse3);
@ -2744,7 +2750,7 @@ static void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
H264_QPEL_FUNCS(3, 3, ssse3);
}
#if HAVE_YASM
else if (bit_depth == 10) {
else if (bit_depth == 10 && CONFIG_H264QPEL) {
H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
@ -2788,9 +2794,11 @@ static void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
if (bit_depth == 10) {
// AVX implies !cache64.
// TODO: Port cache(32|64) detection from x264.
if (CONFIG_H264QPEL) {
H264_QPEL_FUNCS_10(1, 0, sse2);
H264_QPEL_FUNCS_10(2, 0, sse2);
H264_QPEL_FUNCS_10(3, 0, sse2);
}
if (CONFIG_H264CHROMA) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;

Loading…
Cancel
Save