Merge remote-tracking branch 'qatar/master'

* qatar/master: (35 commits)
  h264_idct_10bit: port x86 assembly to cpuflags.
  x86inc: clip num_args to 7 on x86-32.
  x86inc: sync to latest version from x264.
  fft: rename "z" to "zc" to prevent name collision.
  wv: return meaningful error codes.
  wv: return AVERROR_EOF on EOF, not EIO.
  mp3dec: forward errors for av_get_packet().
  mp3dec: remove a pointless local variable.
  mp3dec: remove commented out cruft.
  lavfi: bump minor to mark stabilizing the ABI.
  FATE: add tests for yadif.
  FATE: add a test for delogo video filter.
  FATE: add a test for amix audio filter.
  audiogen: allow specifying random seed as a commandline parameter.
  vc1dec: Override invalid macroblock quantizer
  vc1: avoid reading beyond the last line in vc1_draw_sprites()
  vc1dec: check that coded slice positions and interlacing match.
  vc1dec: Do not ignore ff_vc1_parse_frame_header_adv return value
  configure: Move parts that should not be user-selectable to CONFIG_EXTRA
  lavf: remove commented out cruft in avformat_find_stream_info()
  ...

Conflicts:
	Makefile
	configure
	libavcodec/vc1dec.c
	libavcodec/x86/h264_deblock.asm
	libavcodec/x86/h264_deblock_10bit.asm
	libavcodec/x86/h264dsp_mmx.c
	libavfilter/version.h
	libavformat/mp3dec.c
	libavformat/utils.c
	libavformat/wv.c
	libavutil/x86/x86inc.asm

Merged-by: Michael Niedermayer <michaelni@gmx.at>
pull/28/head
Michael Niedermayer 12 years ago
commit 706bd8ea19
  1. 6
      common.mak
  2. 39
      configure
  3. 25
      libavcodec/vc1dec.c
  4. 16
      libavcodec/x86/dsputil_mmx.c
  5. 18
      libavcodec/x86/fft_mmx.asm
  6. 40
      libavcodec/x86/h264_chromamc_10bit.asm
  7. 102
      libavcodec/x86/h264_deblock.asm
  8. 77
      libavcodec/x86/h264_deblock_10bit.asm
  9. 254
      libavcodec/x86/h264_idct_10bit.asm
  10. 60
      libavcodec/x86/h264dsp_mmx.c
  11. 11
      libavcodec/x86/rv34dsp.asm
  12. 97
      libavcodec/x86/vp3dsp.asm
  13. 2
      libavcodec/x86/vp3dsp_init.c
  14. 36
      libavcodec/x86/vp56dsp.asm
  15. 2
      libavcodec/x86/vp56dsp_init.c
  16. 2
      libavdevice/Makefile
  17. 2
      libavdevice/alldevices.c
  18. 2
      libavdevice/avdevice.h
  19. 1
      libavdevice/fbdev.c
  20. 1
      libavdevice/jack_audio.c
  21. 1
      libavdevice/oss_audio.c
  22. 26
      libavdevice/x11grab.c
  23. 2
      libavfilter/version.h
  24. 2
      libavformat/hls.c
  25. 7
      libavformat/mp3dec.c
  26. 1
      libavformat/rtpdec.c
  27. 1
      libavformat/rtsp.c
  28. 1
      libavformat/rtspdec.c
  29. 1
      libavformat/rtspenc.c
  30. 1
      libavformat/sapenc.c
  31. 4
      libavformat/tls.c
  32. 5
      libavformat/utils.c
  33. 47
      libavformat/wv.c
  34. 219
      libavutil/x86/x86inc.asm
  35. 2
      library.mak
  36. 7
      tests/audiogen.c
  37. 36
      tests/fate/filter.mak
  38. 110
      tests/ref/fate/filter-delogo
  39. 32
      tests/ref/fate/filter-yadif-mode0
  40. 34
      tests/ref/fate/filter-yadif-mode1

@ -11,7 +11,7 @@ ifndef V
Q = @
ECHO = printf "$(1)\t%s\n" $(2)
BRIEF = CC CXX AS YASM AR LD HOSTCC STRIP CP
SILENT = DEPCC DEPAS DEPHOSTCC YASMDEP RM RANLIB
SILENT = DEPCC DEPAS DEPHOSTCC DEPYASM RM RANLIB
MSG = $@
M = @$(call ECHO,$(TAG),$@);
$(foreach VAR,$(BRIEF), \
@ -35,7 +35,7 @@ LDFLAGS := $(ALLFFLIBS:%=-Llib%) $(LDFLAGS)
define COMPILE
$(call $(1)DEP,$(1))
$($(1)) $($(1)FLAGS) $($(1)_DEPFLAGS) -c $($(1)_O) $<
$($(1)) $($(1)FLAGS) $($(1)_DEPFLAGS) $($(1)_C) $($(1)_O) $<
endef
COMPILE_C = $(call COMPILE,CC)
@ -55,7 +55,7 @@ COMPILE_S = $(call COMPILE,AS)
$(COMPILE_S)
%.ho: %.h
$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ -x c $<
$(CC) $(CCFLAGS) -c $(CC_O) -x c $<
%.ver: %.v
$(Q)sed 's/$$MAJOR/$($(basename $(@F))_VERSION_MAJOR)/' $^ > $@

39
configure vendored

@ -673,11 +673,15 @@ cc_o(){
eval printf '%s\\n' $CC_O
}
cc_e(){
eval printf '%s\\n' $CC_E
}
check_cc(){
log check_cc "$@"
cat > $TMPC
log_file $TMPC
check_cmd $cc $CPPFLAGS $CFLAGS "$@" -c $(cc_o $TMPO) $TMPC
check_cmd $cc $CPPFLAGS $CFLAGS "$@" $CC_C $(cc_o $TMPO) $TMPC
}
check_cxx(){
@ -691,14 +695,14 @@ check_cpp(){
log check_cpp "$@"
cat > $TMPC
log_file $TMPC
check_cmd $cc $CPPFLAGS $CFLAGS "$@" -E -o $TMPO $TMPC
check_cmd $cc $CPPFLAGS $CFLAGS "$@" $(cc_e $TMPO) $TMPC
}
check_as(){
log check_as "$@"
cat > $TMPC
log_file $TMPC
check_cmd $as $CPPFLAGS $ASFLAGS "$@" -c -o $TMPO $TMPC
check_cmd $as $CPPFLAGS $ASFLAGS "$@" $AS_C -o $TMPO $TMPC
}
check_asm(){
@ -1043,7 +1047,6 @@ PROGRAM_LIST="
CONFIG_LIST="
$COMPONENT_LIST
$PROGRAM_LIST
ac3dsp
avcodec
avdevice
avfilter
@ -1061,7 +1064,6 @@ CONFIG_LIST="
fft
fontconfig
frei0r
gcrypt
gnutls
gpl
gray
@ -1107,8 +1109,6 @@ CONFIG_LIST="
mdct
memalign_hack
memory_poisoning
mpegaudiodsp
nettle
network
nonfree
openal
@ -1116,11 +1116,9 @@ CONFIG_LIST="
pic
postproc
rdft
rtpdec
runtime_cpudetect
safe_bitstream_reader
shared
sinewin
small
sram
static
@ -1341,7 +1339,9 @@ HAVE_LIST="
# options emitted with CONFIG_ prefix but not available on command line
CONFIG_EXTRA="
aandcttables
ac3dsp
avutil
gcrypt
golomb
gplv3
h264chroma
@ -1351,6 +1351,10 @@ CONFIG_EXTRA="
huffman
lgplv3
lpc
mpegaudiodsp
nettle
rtpdec
sinewin
vp3dsp
"
@ -1738,7 +1742,7 @@ v4l_indev_deps="linux_videodev_h"
v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h"
vfwcap_indev_deps="capCreateCaptureWindow vfwcap_defines"
vfwcap_indev_extralibs="-lavicap32"
x11_grab_device_indev_deps="x11grab"
x11grab_indev_deps="x11grab"
# protocols
bluray_protocol_deps="libbluray"
@ -1968,10 +1972,14 @@ asflags_filter=echo
cflags_filter=echo
ldflags_filter=echo
AS_C='-c'
AS_O='-o $@'
CC_C='-c'
CC_E='-E -o $@'
CC_O='-o $@'
CXX_O='-o $@'
LD_O='-o $@'
HOSTCC_C='-c'
HOSTCC_O='-o $@'
host_cflags='-D_ISOC99_SOURCE -D_XOPEN_SOURCE=600 -O3 -g'
@ -2331,7 +2339,8 @@ probe_cc(){
pfx=$1
_cc=$2
unset _type _ident _cc_o _flags _cflags _ldflags _depflags _DEPCMD _DEPFLAGS
unset _type _ident _cc_c _cc_e _cc_o _flags _cflags _ldflags
unset _depflags _DEPCMD _DEPFLAGS
_flags_filter=echo
if $_cc -v 2>&1 | grep -q '^gcc.*LLVM'; then
@ -2394,6 +2403,7 @@ probe_cc(){
_ident=$($_cc -version | head -n1 | tr -s ' ')
_flags='--gcc --abi=eabi -me'
_cflags='-D__gnuc_va_list=va_list -D__USER_LABEL_PREFIX__='
_cc_e='-ppl -fe=$@'
_cc_o='-fe=$@'
as_default="${cross_prefix}gcc"
ld_default="${cross_prefix}gcc"
@ -2445,6 +2455,8 @@ probe_cc(){
}
set_ccvars(){
eval ${1}_C=\${_cc_c-\${${1}_C}}
eval ${1}_E=\${_cc_e-\${${1}_E}}
eval ${1}_O=\${_cc_o-\${${1}_O}}
if [ -n "$_depflags" ]; then
@ -3874,7 +3886,7 @@ DEPCCFLAGS=$DEPCCFLAGS \$(CPPFLAGS)
DEPAS=$as
DEPASFLAGS=$DEPASFLAGS \$(CPPFLAGS)
YASM=$yasmexe
YASMDEP=$yasmexe
DEPYASM=$yasmexe
AR=$ar
RANLIB=$ranlib
CP=cp -p
@ -3884,7 +3896,9 @@ CPPFLAGS=$CPPFLAGS
CFLAGS=$CFLAGS
CXXFLAGS=$CXXFLAGS
ASFLAGS=$ASFLAGS
AS_C=$AS_C
AS_O=$AS_O
CC_C=$CC_C
CC_O=$CC_O
CXX_O=$CXX_O
LD_O=$LD_O
@ -3919,6 +3933,7 @@ DEPHOSTCCFLAGS=$DEPHOSTCCFLAGS \$(HOSTCCFLAGS)
HOSTCCDEP=$HOSTCCDEP
HOSTCCDEP_FLAGS=$HOSTCCDEP_FLAGS
HOSTCC_DEPFLAGS=$HOSTCC_DEPFLAGS
HOSTCC_C=$HOSTCC_C
HOSTCC_O=$HOSTCC_O
TARGET_EXEC=$target_exec
TARGET_PATH=$target_path

@ -1050,9 +1050,10 @@ static void vc1_mc_4mv_chroma4(VC1Context *v)
if ((edges&8) && s->mb_y == (s->mb_height - 1)) \
mquant = v->altpq; \
if (!mquant || mquant > 31) { \
av_log(v->s.avctx, AV_LOG_ERROR, "invalid mquant %d\n", mquant); \
mquant = 1; \
} \
av_log(v->s.avctx, AV_LOG_ERROR, \
"Overriding invalid mquant %d\n", mquant); \
mquant = 1; \
} \
}
/**
@ -4944,15 +4945,17 @@ static void vc1_draw_sprites(VC1Context *v, SpriteData* sd)
int iline = s->current_picture.f.linesize[plane];
int ycoord = yoff[sprite] + yadv[sprite] * row;
int yline = ycoord >> 16;
int next_line;
ysub[sprite] = ycoord & 0xFFFF;
if (sprite) {
iplane = s->last_picture.f.data[plane];
iline = s->last_picture.f.linesize[plane];
}
next_line = FFMIN(yline + 1, (v->sprite_height >> !!plane) - 1) * iline;
if (!(xoff[sprite] & 0xFFFF) && xadv[sprite] == 1 << 16) {
src_h[sprite][0] = iplane + (xoff[sprite] >> 16) + yline * iline;
if (ysub[sprite])
src_h[sprite][1] = iplane + (xoff[sprite] >> 16) + FFMIN(yline + 1, (v->sprite_height>>!!plane)-1) * iline;
src_h[sprite][1] = iplane + (xoff[sprite] >> 16) + next_line;
} else {
if (sr_cache[sprite][0] != yline) {
if (sr_cache[sprite][1] == yline) {
@ -4964,7 +4967,9 @@ static void vc1_draw_sprites(VC1Context *v, SpriteData* sd)
}
}
if (ysub[sprite] && sr_cache[sprite][1] != yline + 1) {
v->vc1dsp.sprite_h(v->sr_rows[sprite][1], iplane + FFMIN(yline + 1, (v->sprite_height>>!!plane)-1) * iline, xoff[sprite], xadv[sprite], width);
v->vc1dsp.sprite_h(v->sr_rows[sprite][1],
iplane + next_line, xoff[sprite],
xadv[sprite], width);
sr_cache[sprite][1] = yline + 1;
}
src_h[sprite][0] = v->sr_rows[sprite][0];
@ -5581,8 +5586,10 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
mb_height = s->mb_height >> v->field_mode;
for (i = 0; i <= n_slices; i++) {
if (i > 0 && slices[i - 1].mby_start >= mb_height) {
if(v->field_mode <= 0) {
av_log(v->s.avctx, AV_LOG_ERROR, "invalid end_mb_y %d\n", slices[i - 1].mby_start);
if (v->field_mode <= 0) {
av_log(v->s.avctx, AV_LOG_ERROR, "Slice %d starts beyond "
"picture boundary (%d >= %d)\n", i,
slices[i - 1].mby_start, mb_height);
continue;
}
v->second_field = 1;
@ -5597,13 +5604,13 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
v->pic_header_flag = 0;
if (v->field_mode && i == n_slices1 + 2) {
if (ff_vc1_parse_frame_header_adv(v, &s->gb) < 0) {
av_log(v->s.avctx, AV_LOG_ERROR, "slice header damaged\n");
av_log(v->s.avctx, AV_LOG_ERROR, "Field header damaged\n");
continue;
}
} else if (get_bits1(&s->gb)) {
v->pic_header_flag = 1;
if (ff_vc1_parse_frame_header_adv(v, &s->gb) < 0) {
av_log(v->s.avctx, AV_LOG_ERROR, "slice header damaged\n");
av_log(v->s.avctx, AV_LOG_ERROR, "Slice header damaged\n");
continue;
}
}

@ -2137,10 +2137,10 @@ void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, uint8_t *src, \
int stride, int h, int x, int y);
CHROMA_MC(put, 2, 10, mmxext)
CHROMA_MC(avg, 2, 10, mmxext)
CHROMA_MC(put, 4, 10, mmxext)
CHROMA_MC(avg, 4, 10, mmxext)
CHROMA_MC(put, 2, 10, mmx2)
CHROMA_MC(avg, 2, 10, mmx2)
CHROMA_MC(put, 4, 10, mmx2)
CHROMA_MC(avg, 4, 10, mmx2)
CHROMA_MC(put, 8, 10, sse2)
CHROMA_MC(avg, 8, 10, sse2)
CHROMA_MC(put, 8, 10, avx)
@ -2841,10 +2841,10 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmx2;
}
if (bit_depth == 10 && CONFIG_H264CHROMA) {
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmx2;
c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmx2;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmx2;
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmx2;
}
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;

@ -517,23 +517,23 @@ INIT_MMX 3dnow
FFT48_3DN
%define Z(x) [zq + o1q*(x&6) + mmsize*(x&1)]
%define Z2(x) [zq + o3q + mmsize*(x&1)]
%define ZH(x) [zq + o1q*(x&6) + mmsize*(x&1) + mmsize/2]
%define Z2H(x) [zq + o3q + mmsize*(x&1) + mmsize/2]
%define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)]
%define Z2(x) [zcq + o3q + mmsize*(x&1)]
%define ZH(x) [zcq + o1q*(x&6) + mmsize*(x&1) + mmsize/2]
%define Z2H(x) [zcq + o3q + mmsize*(x&1) + mmsize/2]
%macro DECL_PASS 2+ ; name, payload
align 16
%1:
DEFINE_ARGS z, w, n, o1, o3
DEFINE_ARGS zc, w, n, o1, o3
lea o3q, [nq*3]
lea o1q, [nq*8]
shl o3q, 4
.loop:
%2
add zq, mmsize*2
add wq, mmsize
sub nd, mmsize/8
add zcq, mmsize*2
add wq, mmsize
sub nd, mmsize/8
jg .loop
rep ret
%endmacro
@ -748,7 +748,7 @@ section .text
; On x86_32, this function does the register saving and restoring for all of fft.
; The others pass args in registers and don't spill anything.
cglobal fft_dispatch%2, 2,5,8, z, nbits
cglobal fft_dispatch%2, 2,5,8, zc, nbits
FFT_DISPATCH fullsuffix, nbits
RET
%endmacro ; DECL_FFT

@ -60,10 +60,10 @@ SECTION .text
;-----------------------------------------------------------------------------
; void put/avg_h264_chroma_mc8(pixel *dst, pixel *src, int stride, int h, int mx, int my)
;-----------------------------------------------------------------------------
%macro CHROMA_MC8 2
%macro CHROMA_MC8 1
; put/avg_h264_chroma_mc8_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
; int stride, int h, int mx, int my)
cglobal %1_h264_chroma_mc8_10_%2, 6,7,8
cglobal %1_h264_chroma_mc8_10, 6,7,8
movsxdifnidn r2, r2d
mov r6d, r5d
or r6d, r4d
@ -173,8 +173,8 @@ cglobal %1_h264_chroma_mc8_10_%2, 6,7,8
add r0, r2
%endmacro
%macro CHROMA_MC4 2
cglobal %1_h264_chroma_mc4_10_%2, 6,6,7
%macro CHROMA_MC4 1
cglobal %1_h264_chroma_mc4_10, 6,6,7
movsxdifnidn r2, r2d
movd m2, r4m ; x
movd m3, r5m ; y
@ -203,8 +203,8 @@ cglobal %1_h264_chroma_mc4_10_%2, 6,6,7
;-----------------------------------------------------------------------------
; void put/avg_h264_chroma_mc2(pixel *dst, pixel *src, int stride, int h, int mx, int my)
;-----------------------------------------------------------------------------
%macro CHROMA_MC2 2
cglobal %1_h264_chroma_mc2_10_%2, 6,7
%macro CHROMA_MC2 1
cglobal %1_h264_chroma_mc2_10, 6,7
movsxdifnidn r2, r2d
mov r6d, r4d
shl r4d, 16
@ -250,24 +250,24 @@ cglobal %1_h264_chroma_mc2_10_%2, 6,7
%endmacro
%define CHROMAMC_AVG NOTHING
INIT_XMM
CHROMA_MC8 put, sse2
INIT_XMM sse2
CHROMA_MC8 put
%if HAVE_AVX
INIT_AVX
CHROMA_MC8 put, avx
INIT_XMM avx
CHROMA_MC8 put
%endif
INIT_MMX
CHROMA_MC4 put, mmxext
CHROMA_MC2 put, mmxext
INIT_MMX mmx2
CHROMA_MC4 put
CHROMA_MC2 put
%define CHROMAMC_AVG AVG
%define PAVG pavgw
INIT_XMM
CHROMA_MC8 avg, sse2
INIT_XMM sse2
CHROMA_MC8 avg
%if HAVE_AVX
INIT_AVX
CHROMA_MC8 avg, avx
INIT_XMM avx
CHROMA_MC8 avg
%endif
INIT_MMX
CHROMA_MC4 avg, mmxext
CHROMA_MC2 avg, mmxext
INIT_MMX mmx2
CHROMA_MC4 avg
CHROMA_MC2 avg

@ -282,8 +282,8 @@ cextern pb_A1
;-----------------------------------------------------------------------------
; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
%macro DEBLOCK_LUMA 1
cglobal deblock_v_luma_8_%1, 5,5,10
%macro DEBLOCK_LUMA 0
cglobal deblock_v_luma_8, 5,5,10
movd m8, [r4] ; tc0
lea r4, [r1*3]
dec r2d ; alpha-1
@ -327,8 +327,8 @@ cglobal deblock_v_luma_8_%1, 5,5,10
;-----------------------------------------------------------------------------
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
INIT_MMX
cglobal deblock_h_luma_8_%1, 5,9
INIT_MMX cpuname
cglobal deblock_h_luma_8, 5,9
movsxd r7, r1d
lea r8, [r7+r7*2]
lea r6, [r0-4]
@ -355,7 +355,7 @@ cglobal deblock_h_luma_8_%1, 5,9
%if WIN64
mov [rsp+0x20], r4
%endif
call deblock_v_luma_8_%1
call deblock_v_luma_8
; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter)
add r6, 2
@ -384,26 +384,26 @@ cglobal deblock_h_luma_8_%1, 5,9
RET
%endmacro
INIT_XMM
DEBLOCK_LUMA sse2
INIT_XMM sse2
DEBLOCK_LUMA
%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA avx
INIT_XMM avx
DEBLOCK_LUMA
%endif
%else
%macro DEBLOCK_LUMA 3
%macro DEBLOCK_LUMA 2
;-----------------------------------------------------------------------------
; void deblock_v8_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal deblock_%2_luma_8_%1, 5,5
cglobal deblock_%1_luma_8, 5,5
lea r4, [r1*3]
dec r2 ; alpha-1
neg r4
dec r3 ; beta-1
add r4, r0 ; pix-3*stride
%assign pad 2*%3+12-(stack_offset&15)
%assign pad 2*%2+12-(stack_offset&15)
SUB esp, pad
mova m0, [r4+r1] ; p1
@ -417,7 +417,7 @@ cglobal deblock_%2_luma_8_%1, 5,5
movd m4, [r3] ; tc0
punpcklbw m4, m4
punpcklbw m4, m4 ; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0]
mova [esp+%3], m4 ; tc
mova [esp+%2], m4 ; tc
pcmpgtb m4, m3
mova m3, [r4] ; p2
pand m4, m7
@ -425,7 +425,7 @@ cglobal deblock_%2_luma_8_%1, 5,5
DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1
pand m6, m4
pand m4, [esp+%3] ; tc
pand m4, [esp+%2] ; tc
psubb m7, m4, m6
pand m6, m4
LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4
@ -433,7 +433,7 @@ cglobal deblock_%2_luma_8_%1, 5,5
mova m4, [r0+2*r1] ; q2
DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1
pand m6, [esp] ; mask
mova m5, [esp+%3] ; tc
mova m5, [esp+%2] ; tc
psubb m7, m6
pand m5, m6
mova m3, [r0+r1]
@ -448,8 +448,8 @@ cglobal deblock_%2_luma_8_%1, 5,5
;-----------------------------------------------------------------------------
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
INIT_MMX
cglobal deblock_h_luma_8_%1, 0,5
INIT_MMX cpuname
cglobal deblock_h_luma_8, 0,5
mov r0, r0mp
mov r3, r1m
lea r4, [r3*3]
@ -472,11 +472,11 @@ cglobal deblock_h_luma_8_%1, 0,5
PUSH dword r2m
PUSH dword 16
PUSH dword r0
call deblock_%2_luma_8_%1
%ifidn %2, v8
call deblock_%1_luma_8
%ifidn %1, v8
add dword [esp ], 8 ; pix_tmp+0x38
add dword [esp+16], 2 ; tc0+2
call deblock_%2_luma_8_%1
call deblock_%1_luma_8
%endif
ADD esp, 20
@ -503,13 +503,13 @@ cglobal deblock_h_luma_8_%1, 0,5
RET
%endmacro ; DEBLOCK_LUMA
INIT_MMX
DEBLOCK_LUMA mmxext, v8, 8
INIT_XMM
DEBLOCK_LUMA sse2, v, 16
INIT_MMX mmx2
DEBLOCK_LUMA v8, 8
INIT_XMM sse2
DEBLOCK_LUMA v, 16
%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA avx, v, 16
INIT_XMM avx
DEBLOCK_LUMA v, 16
%endif
%endif ; ARCH
@ -612,7 +612,7 @@ DEBLOCK_LUMA avx, v, 16
%define mask1p mask1q
%endmacro
%macro DEBLOCK_LUMA_INTRA 2
%macro DEBLOCK_LUMA_INTRA 1
%define p1 m0
%define p0 m1
%define q0 m2
@ -647,7 +647,7 @@ DEBLOCK_LUMA avx, v, 16
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_%2_luma_intra_8_%1, 4,6,16
cglobal deblock_%1_luma_intra_8, 4,6,16
%if ARCH_X86_64 == 0
sub esp, 0x60
%endif
@ -704,12 +704,12 @@ cglobal deblock_%2_luma_intra_8_%1, 4,6,16
%endif
RET
INIT_MMX
INIT_MMX cpuname
%if ARCH_X86_64
;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_8_%1, 4,9
cglobal deblock_h_luma_intra_8, 4,9
movsxd r7, r1d
lea r8, [r7*3]
lea r6, [r0-4]
@ -725,7 +725,7 @@ cglobal deblock_h_luma_intra_8_%1, 4,9
lea r0, [pix_tmp+0x40]
mov r1, 0x10
call deblock_v_luma_intra_8_%1
call deblock_v_luma_intra_8
; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8)
lea r5, [r6+r8]
@ -738,7 +738,7 @@ cglobal deblock_h_luma_intra_8_%1, 4,9
add rsp, 0x88
RET
%else
cglobal deblock_h_luma_intra_8_%1, 2,4
cglobal deblock_h_luma_intra_8, 2,4
lea r3, [r1*3]
sub r0, 4
lea r2, [r0+r3]
@ -757,10 +757,10 @@ cglobal deblock_h_luma_intra_8_%1, 2,4
PUSH dword r2m
PUSH dword 16
PUSH r0
call deblock_%2_luma_intra_8_%1
%ifidn %2, v8
call deblock_%1_luma_intra_8
%ifidn %1, v8
add dword [rsp], 8 ; pix_tmp+8
call deblock_%2_luma_intra_8_%1
call deblock_%1_luma_intra_8
%endif
ADD esp, 16
@ -779,18 +779,18 @@ cglobal deblock_h_luma_intra_8_%1, 2,4
%endif ; ARCH_X86_64
%endmacro ; DEBLOCK_LUMA_INTRA
INIT_XMM
DEBLOCK_LUMA_INTRA sse2, v
INIT_XMM sse2
DEBLOCK_LUMA_INTRA v
%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA_INTRA avx , v
INIT_XMM avx
DEBLOCK_LUMA_INTRA v
%endif
%if ARCH_X86_64 == 0
INIT_MMX
DEBLOCK_LUMA_INTRA mmxext, v8
INIT_MMX mmx2
DEBLOCK_LUMA_INTRA v8
%endif
INIT_MMX
INIT_MMX mmx2
%macro CHROMA_V_START 0
dec r2d ; alpha-1
@ -815,13 +815,13 @@ INIT_MMX
;-----------------------------------------------------------------------------
; void ff_deblock_v_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal deblock_v_chroma_8_mmxext, 5,6
cglobal deblock_v_chroma_8, 5,6
CHROMA_V_START
movq m0, [t5]
movq m1, [t5+r1]
movq m2, [r0]
movq m3, [r0+r1]
call ff_chroma_inter_body_mmxext
call ff_chroma_inter_body_mmx2
movq [t5+r1], m1
movq [r0], m2
RET
@ -829,7 +829,7 @@ cglobal deblock_v_chroma_8_mmxext, 5,6
;-----------------------------------------------------------------------------
; void ff_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal deblock_h_chroma_8_mmxext, 5,7
cglobal deblock_h_chroma_8, 5,7
%if UNIX64
%define buf0 [rsp-24]
%define buf1 [rsp-16]
@ -859,7 +859,7 @@ cglobal deblock_h_chroma_8_mmxext, 5,7
RET
ALIGN 16
ff_chroma_inter_body_mmxext:
ff_chroma_inter_body_mmx2:
LOAD_MASK r2d, r3d
movd m6, [r4] ; tc0
punpcklbw m6, m6
@ -886,13 +886,13 @@ ff_chroma_inter_body_mmxext:
;-----------------------------------------------------------------------------
; void ff_deblock_v_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_v_chroma_intra_8_mmxext, 4,5
cglobal deblock_v_chroma_intra_8, 4,5
CHROMA_V_START
movq m0, [t5]
movq m1, [t5+r1]
movq m2, [r0]
movq m3, [r0+r1]
call ff_chroma_intra_body_mmxext
call ff_chroma_intra_body_mmx2
movq [t5+r1], m1
movq [r0], m2
RET
@ -900,15 +900,15 @@ cglobal deblock_v_chroma_intra_8_mmxext, 4,5
;-----------------------------------------------------------------------------
; void ff_deblock_h_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_h_chroma_intra_8_mmxext, 4,6
cglobal deblock_h_chroma_intra_8, 4,6
CHROMA_H_START
TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
call ff_chroma_intra_body_mmxext
call ff_chroma_intra_body_mmx2
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
RET
ALIGN 16
ff_chroma_intra_body_mmxext:
ff_chroma_intra_body_mmx2:
LOAD_MASK r2d, r3d
movq m5, m1
movq m6, m2

@ -151,11 +151,11 @@ cextern pw_4
%endif
%endmacro
%macro DEBLOCK_LUMA 1
%macro DEBLOCK_LUMA 0
;-----------------------------------------------------------------------------
; void deblock_v_luma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16)
cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
%assign pad 5*mmsize+12-(stack_offset&15)
%define tcm [rsp]
%define ms1 [rsp+mmsize]
@ -210,7 +210,7 @@ cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16)
ADD rsp, pad
RET
cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16)
cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
%assign pad 7*mmsize+12-(stack_offset&15)
%define tcm [rsp]
%define ms1 [rsp+mmsize]
@ -301,7 +301,6 @@ cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16)
RET
%endmacro
INIT_XMM
%if ARCH_X86_64
; in: m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2
; m12=alpha, m13=beta
@ -339,8 +338,8 @@ INIT_XMM
SWAP 3, 9
%endmacro
%macro DEBLOCK_LUMA_64 1
cglobal deblock_v_luma_10_%1, 5,5,15
%macro DEBLOCK_LUMA_64 0
cglobal deblock_v_luma_10, 5,5,15
%define p2 m8
%define p1 m0
%define p0 m1
@ -377,7 +376,7 @@ cglobal deblock_v_luma_10_%1, 5,5,15
jg .loop
REP_RET
cglobal deblock_h_luma_10_%1, 5,7,15
cglobal deblock_h_luma_10, 5,7,15
shl r2d, 2
shl r3d, 2
LOAD_AB m12, m13, r2d, r3d
@ -417,11 +416,11 @@ cglobal deblock_h_luma_10_%1, 5,7,15
REP_RET
%endmacro
INIT_XMM
DEBLOCK_LUMA_64 sse2
INIT_XMM sse2
DEBLOCK_LUMA_64
%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA_64 avx
INIT_XMM avx
DEBLOCK_LUMA_64
%endif
%endif
@ -604,8 +603,8 @@ DEBLOCK_LUMA_64 avx
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
%macro DEBLOCK_LUMA_INTRA_64 1
cglobal deblock_v_luma_intra_10_%1, 4,7,16
%macro DEBLOCK_LUMA_INTRA_64 0
cglobal deblock_v_luma_intra_10, 4,7,16
%define t0 m1
%define t1 m2
%define t2 m4
@ -655,7 +654,7 @@ cglobal deblock_v_luma_intra_10_%1, 4,7,16
;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_10_%1, 4,7,16
cglobal deblock_h_luma_intra_10, 4,7,16
%define t0 m15
%define t1 m14
%define t2 m2
@ -714,20 +713,20 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,16
RET
%endmacro
INIT_XMM
DEBLOCK_LUMA_INTRA_64 sse2
INIT_XMM sse2
DEBLOCK_LUMA_INTRA_64
%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA_INTRA_64 avx
INIT_XMM avx
DEBLOCK_LUMA_INTRA_64
%endif
%endif
%macro DEBLOCK_LUMA_INTRA 1
%macro DEBLOCK_LUMA_INTRA 0
;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_v_luma_intra_10_%1, 4,7,8*(mmsize/16)
cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16)
LUMA_INTRA_INIT 3
lea r4, [r1*4]
lea r5, [r1*3]
@ -755,7 +754,7 @@ cglobal deblock_v_luma_intra_10_%1, 4,7,8*(mmsize/16)
;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16)
cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
LUMA_INTRA_INIT 8
%if mmsize == 8
lea r4, [r1*3]
@ -797,16 +796,16 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16)
%endmacro
%if ARCH_X86_64 == 0
INIT_MMX
DEBLOCK_LUMA mmxext
DEBLOCK_LUMA_INTRA mmxext
INIT_XMM
DEBLOCK_LUMA sse2
DEBLOCK_LUMA_INTRA sse2
INIT_MMX mmx2
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
INIT_XMM sse2
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
%if HAVE_AVX
INIT_AVX
DEBLOCK_LUMA avx
DEBLOCK_LUMA_INTRA avx
INIT_XMM avx
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
%endif
%endif
@ -849,11 +848,11 @@ DEBLOCK_LUMA_INTRA avx
psraw %1, 6
%endmacro
%macro DEBLOCK_CHROMA 1
%macro DEBLOCK_CHROMA 0
;-----------------------------------------------------------------------------
; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16)
cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
mov r5, r0
sub r0, r1
sub r0, r1
@ -887,7 +886,7 @@ cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16)
;-----------------------------------------------------------------------------
; void deblock_v_chroma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16)
cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
mov r4, r0
sub r0, r1
sub r0, r1
@ -914,12 +913,12 @@ cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16)
%endmacro
%if ARCH_X86_64 == 0
INIT_MMX
DEBLOCK_CHROMA mmxext
INIT_MMX mmx2
DEBLOCK_CHROMA
%endif
INIT_XMM
DEBLOCK_CHROMA sse2
INIT_XMM sse2
DEBLOCK_CHROMA
%if HAVE_AVX
INIT_AVX
DEBLOCK_CHROMA avx
INIT_XMM avx
DEBLOCK_CHROMA
%endif

@ -72,25 +72,25 @@ SECTION .text
STORE_DIFFx2 m2, m3, m4, m5, %1, %3
%endmacro
%macro IDCT_ADD_10 1
cglobal h264_idct_add_10_%1, 3,3
%macro IDCT_ADD_10 0
cglobal h264_idct_add_10, 3,3
IDCT4_ADD_10 r0, r1, r2
RET
%endmacro
INIT_XMM
IDCT_ADD_10 sse2
INIT_XMM sse2
IDCT_ADD_10
%if HAVE_AVX
INIT_AVX
IDCT_ADD_10 avx
INIT_XMM avx
IDCT_ADD_10
%endif
;-----------------------------------------------------------------------------
; h264_idct_add16(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
;;;;;;; NO FATE SAMPLES TRIGGER THIS
%macro ADD4x4IDCT 1
add4x4_idct_%1:
%macro ADD4x4IDCT 0
add4x4_idct %+ SUFFIX:
add r5, r0
mova m0, [r2+ 0]
mova m1, [r2+16]
@ -107,52 +107,52 @@ add4x4_idct_%1:
ret
%endmacro
INIT_XMM
INIT_XMM sse2
ALIGN 16
ADD4x4IDCT sse2
ADD4x4IDCT
%if HAVE_AVX
INIT_AVX
INIT_XMM avx
ALIGN 16
ADD4x4IDCT avx
ADD4x4IDCT
%endif
%macro ADD16_OP 3
cmp byte [r4+%3], 0
jz .skipblock%2
mov r5d, [r1+%2*4]
call add4x4_idct_%1
.skipblock%2:
%if %2<15
%macro ADD16_OP 2
cmp byte [r4+%2], 0
jz .skipblock%1
mov r5d, [r1+%1*4]
call add4x4_idct %+ SUFFIX
.skipblock%1:
%if %1<15
add r2, 64
%endif
%endmacro
%macro IDCT_ADD16_10 1
cglobal h264_idct_add16_10_%1, 5,6
ADD16_OP %1, 0, 4+1*8
ADD16_OP %1, 1, 5+1*8
ADD16_OP %1, 2, 4+2*8
ADD16_OP %1, 3, 5+2*8
ADD16_OP %1, 4, 6+1*8
ADD16_OP %1, 5, 7+1*8
ADD16_OP %1, 6, 6+2*8
ADD16_OP %1, 7, 7+2*8
ADD16_OP %1, 8, 4+3*8
ADD16_OP %1, 9, 5+3*8
ADD16_OP %1, 10, 4+4*8
ADD16_OP %1, 11, 5+4*8
ADD16_OP %1, 12, 6+3*8
ADD16_OP %1, 13, 7+3*8
ADD16_OP %1, 14, 6+4*8
ADD16_OP %1, 15, 7+4*8
%macro IDCT_ADD16_10 0
cglobal h264_idct_add16_10, 5,6
ADD16_OP 0, 4+1*8
ADD16_OP 1, 5+1*8
ADD16_OP 2, 4+2*8
ADD16_OP 3, 5+2*8
ADD16_OP 4, 6+1*8
ADD16_OP 5, 7+1*8
ADD16_OP 6, 6+2*8
ADD16_OP 7, 7+2*8
ADD16_OP 8, 4+3*8
ADD16_OP 9, 5+3*8
ADD16_OP 10, 4+4*8
ADD16_OP 11, 5+4*8
ADD16_OP 12, 6+3*8
ADD16_OP 13, 7+3*8
ADD16_OP 14, 6+4*8
ADD16_OP 15, 7+4*8
REP_RET
%endmacro
INIT_XMM
IDCT_ADD16_10 sse2
INIT_XMM sse2
IDCT_ADD16_10
%if HAVE_AVX
INIT_AVX
IDCT_ADD16_10 avx
INIT_XMM avx
IDCT_ADD16_10
%endif
;-----------------------------------------------------------------------------
@ -185,8 +185,8 @@ IDCT_ADD16_10 avx
mova [%1+%3 ], m4
%endmacro
INIT_MMX
cglobal h264_idct_dc_add_10_mmx2,3,3
INIT_MMX mmx2
cglobal h264_idct_dc_add_10,3,3
movd m0, [r1]
paddd m0, [pd_32]
psrad m0, 6
@ -199,8 +199,8 @@ cglobal h264_idct_dc_add_10_mmx2,3,3
;-----------------------------------------------------------------------------
; void h264_idct8_dc_add(pixel *dst, dctcoef *block, int stride)
;-----------------------------------------------------------------------------
%macro IDCT8_DC_ADD 1
cglobal h264_idct8_dc_add_10_%1,3,3,7
%macro IDCT8_DC_ADD 0
cglobal h264_idct8_dc_add_10,3,3,7
mov r1d, [r1]
add r1, 32
sar r1, 6
@ -214,45 +214,45 @@ cglobal h264_idct8_dc_add_10_%1,3,3,7
RET
%endmacro
INIT_XMM
IDCT8_DC_ADD sse2
INIT_XMM sse2
IDCT8_DC_ADD
%if HAVE_AVX
INIT_AVX
IDCT8_DC_ADD avx
INIT_XMM avx
IDCT8_DC_ADD
%endif
;-----------------------------------------------------------------------------
; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
%macro AC 2
.ac%2
mov r5d, [r1+(%2+0)*4]
call add4x4_idct_%1
mov r5d, [r1+(%2+1)*4]
%macro AC 1
.ac%1
mov r5d, [r1+(%1+0)*4]
call add4x4_idct %+ SUFFIX
mov r5d, [r1+(%1+1)*4]
add r2, 64
call add4x4_idct_%1
call add4x4_idct %+ SUFFIX
add r2, 64
jmp .skipadd%2
jmp .skipadd%1
%endmacro
%assign last_block 16
%macro ADD16_OP_INTRA 3
cmp word [r4+%3], 0
jnz .ac%2
%macro ADD16_OP_INTRA 2
cmp word [r4+%2], 0
jnz .ac%1
mov r5d, [r2+ 0]
or r5d, [r2+64]
jz .skipblock%2
mov r5d, [r1+(%2+0)*4]
call idct_dc_add_%1
.skipblock%2:
%if %2<last_block-2
jz .skipblock%1
mov r5d, [r1+(%1+0)*4]
call idct_dc_add %+ SUFFIX
.skipblock%1:
%if %1<last_block-2
add r2, 128
%endif
.skipadd%2:
.skipadd%1:
%endmacro
%macro IDCT_ADD16INTRA_10 1
idct_dc_add_%1:
%macro IDCT_ADD16INTRA_10 0
idct_dc_add %+ SUFFIX:
add r5, r0
movq m0, [r2+ 0]
movhps m0, [r2+64]
@ -265,46 +265,46 @@ idct_dc_add_%1:
IDCT_DC_ADD_OP_10 r5, r3, r6
ret
cglobal h264_idct_add16intra_10_%1,5,7,8
ADD16_OP_INTRA %1, 0, 4+1*8
ADD16_OP_INTRA %1, 2, 4+2*8
ADD16_OP_INTRA %1, 4, 6+1*8
ADD16_OP_INTRA %1, 6, 6+2*8
ADD16_OP_INTRA %1, 8, 4+3*8
ADD16_OP_INTRA %1, 10, 4+4*8
ADD16_OP_INTRA %1, 12, 6+3*8
ADD16_OP_INTRA %1, 14, 6+4*8
cglobal h264_idct_add16intra_10,5,7,8
ADD16_OP_INTRA 0, 4+1*8
ADD16_OP_INTRA 2, 4+2*8
ADD16_OP_INTRA 4, 6+1*8
ADD16_OP_INTRA 6, 6+2*8
ADD16_OP_INTRA 8, 4+3*8
ADD16_OP_INTRA 10, 4+4*8
ADD16_OP_INTRA 12, 6+3*8
ADD16_OP_INTRA 14, 6+4*8
REP_RET
AC %1, 8
AC %1, 10
AC %1, 12
AC %1, 14
AC %1, 0
AC %1, 2
AC %1, 4
AC %1, 6
AC 8
AC 10
AC 12
AC 14
AC 0
AC 2
AC 4
AC 6
%endmacro
INIT_XMM
IDCT_ADD16INTRA_10 sse2
INIT_XMM sse2
IDCT_ADD16INTRA_10
%if HAVE_AVX
INIT_AVX
IDCT_ADD16INTRA_10 avx
INIT_XMM avx
IDCT_ADD16INTRA_10
%endif
%assign last_block 36
;-----------------------------------------------------------------------------
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
%macro IDCT_ADD8 1
cglobal h264_idct_add8_10_%1,5,8,7
%macro IDCT_ADD8 0
cglobal h264_idct_add8_10,5,8,7
%if ARCH_X86_64
mov r7, r0
%endif
add r2, 1024
mov r0, [r0]
ADD16_OP_INTRA %1, 16, 4+ 6*8
ADD16_OP_INTRA %1, 18, 4+ 7*8
ADD16_OP_INTRA 16, 4+ 6*8
ADD16_OP_INTRA 18, 4+ 7*8
add r2, 1024-128*2
%if ARCH_X86_64
mov r0, [r7+gprsize]
@ -312,21 +312,21 @@ cglobal h264_idct_add8_10_%1,5,8,7
mov r0, r0m
mov r0, [r0+gprsize]
%endif
ADD16_OP_INTRA %1, 32, 4+11*8
ADD16_OP_INTRA %1, 34, 4+12*8
ADD16_OP_INTRA 32, 4+11*8
ADD16_OP_INTRA 34, 4+12*8
REP_RET
AC %1, 16
AC %1, 18
AC %1, 32
AC %1, 34
AC 16
AC 18
AC 32
AC 34
%endmacro ; IDCT_ADD8
INIT_XMM
IDCT_ADD8 sse2
INIT_XMM sse2
IDCT_ADD8
%if HAVE_AVX
INIT_AVX
IDCT_ADD8 avx
INIT_XMM avx
IDCT_ADD8
%endif
;-----------------------------------------------------------------------------
@ -432,19 +432,19 @@ IDCT_ADD8 avx
STORE_DIFFx2 m0, m1, m6, m7, %1, %3
%endmacro
%macro IDCT8_ADD 1
cglobal h264_idct8_add_10_%1, 3,4,16
%macro IDCT8_ADD 0
cglobal h264_idct8_add_10, 3,4,16
%if UNIX64 == 0
%assign pad 16-gprsize-(stack_offset&15)
sub rsp, pad
call h264_idct8_add1_10_%1
call h264_idct8_add1_10 %+ SUFFIX
add rsp, pad
RET
%endif
ALIGN 16
; TODO: does not need to use stack
h264_idct8_add1_10_%1:
h264_idct8_add1_10 %+ SUFFIX:
%assign pad 256+16-gprsize
sub rsp, pad
add dword [r1], 32
@ -499,31 +499,31 @@ h264_idct8_add1_10_%1:
ret
%endmacro
INIT_XMM
IDCT8_ADD sse2
INIT_XMM sse2
IDCT8_ADD
%if HAVE_AVX
INIT_AVX
IDCT8_ADD avx
INIT_XMM avx
IDCT8_ADD
%endif
;-----------------------------------------------------------------------------
; h264_idct8_add4(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
;-----------------------------------------------------------------------------
;;;;;;; NO FATE SAMPLES TRIGGER THIS
%macro IDCT8_ADD4_OP 3
cmp byte [r4+%3], 0
jz .skipblock%2
mov r0d, [r6+%2*4]
%macro IDCT8_ADD4_OP 2
cmp byte [r4+%2], 0
jz .skipblock%1
mov r0d, [r6+%1*4]
add r0, r5
call h264_idct8_add1_10_%1
.skipblock%2:
%if %2<12
call h264_idct8_add1_10 %+ SUFFIX
.skipblock%1:
%if %1<12
add r1, 256
%endif
%endmacro
%macro IDCT8_ADD4 1
cglobal h264_idct8_add4_10_%1, 0,7,16
%macro IDCT8_ADD4 0
cglobal h264_idct8_add4_10, 0,7,16
%assign pad 16-gprsize-(stack_offset&15)
SUB rsp, pad
mov r5, r0mp
@ -531,17 +531,17 @@ cglobal h264_idct8_add4_10_%1, 0,7,16
mov r1, r2mp
mov r2d, r3m
movifnidn r4, r4mp
IDCT8_ADD4_OP %1, 0, 4+1*8
IDCT8_ADD4_OP %1, 4, 6+1*8
IDCT8_ADD4_OP %1, 8, 4+3*8
IDCT8_ADD4_OP %1, 12, 6+3*8
IDCT8_ADD4_OP 0, 4+1*8
IDCT8_ADD4_OP 4, 6+1*8
IDCT8_ADD4_OP 8, 4+3*8
IDCT8_ADD4_OP 12, 6+3*8
ADD rsp, pad
RET
%endmacro ; IDCT8_ADD4
INIT_XMM
IDCT8_ADD4 sse2
INIT_XMM sse2
IDCT8_ADD4
%if HAVE_AVX
INIT_AVX
IDCT8_ADD4 avx
INIT_XMM avx
IDCT8_ADD4
%endif

@ -249,12 +249,12 @@ void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, in
int alpha, int beta);
#define LF_FUNCS(type, depth)\
LF_FUNC (h, chroma, depth, mmxext)\
LF_IFUNC(h, chroma_intra, depth, mmxext)\
LF_FUNC (v, chroma, depth, mmxext)\
LF_IFUNC(v, chroma_intra, depth, mmxext)\
LF_FUNC (h, luma, depth, mmxext)\
LF_IFUNC(h, luma_intra, depth, mmxext)\
LF_FUNC (h, chroma, depth, mmx2)\
LF_IFUNC(h, chroma_intra, depth, mmx2)\
LF_FUNC (v, chroma, depth, mmx2)\
LF_IFUNC(v, chroma_intra, depth, mmx2)\
LF_FUNC (h, luma, depth, mmx2)\
LF_IFUNC(h, luma_intra, depth, mmx2)\
LF_FUNC (h, luma, depth, sse2)\
LF_IFUNC(h, luma_intra, depth, sse2)\
LF_FUNC (v, luma, depth, sse2)\
@ -276,24 +276,24 @@ LF_FUNCS( uint8_t, 8)
LF_FUNCS(uint16_t, 10)
#if ARCH_X86_32 && HAVE_YASM
LF_FUNC (v8, luma, 8, mmxext)
static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
LF_FUNC (v8, luma, 8, mmx2)
static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
{
if((tc0[0] & tc0[1]) >= 0)
ff_deblock_v8_luma_8_mmxext(pix+0, stride, alpha, beta, tc0);
ff_deblock_v8_luma_8_mmx2(pix+0, stride, alpha, beta, tc0);
if((tc0[2] & tc0[3]) >= 0)
ff_deblock_v8_luma_8_mmxext(pix+8, stride, alpha, beta, tc0+2);
ff_deblock_v8_luma_8_mmx2(pix+8, stride, alpha, beta, tc0+2);
}
LF_IFUNC(v8, luma_intra, 8, mmxext)
static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, int alpha, int beta)
LF_IFUNC(v8, luma_intra, 8, mmx2)
static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride, int alpha, int beta)
{
ff_deblock_v8_luma_intra_8_mmxext(pix+0, stride, alpha, beta);
ff_deblock_v8_luma_intra_8_mmxext(pix+8, stride, alpha, beta);
ff_deblock_v8_luma_intra_8_mmx2(pix+0, stride, alpha, beta);
ff_deblock_v8_luma_intra_8_mmx2(pix+8, stride, alpha, beta);
}
#endif /* ARCH_X86_32 */
LF_FUNC (v, luma, 10, mmxext)
LF_IFUNC(v, luma_intra, 10, mmxext)
LF_FUNC (v, luma, 10, mmx2)
LF_IFUNC(v, luma_intra, 10, mmx2)
/***********************************/
/* weighted prediction */
@ -373,17 +373,17 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2;
c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2;
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext;
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmx2;
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmx2;
if (chroma_format_idc == 1) {
c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext;
c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmx2;
c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmx2;
}
#if ARCH_X86_32
c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext;
c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmx2;
c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmx2;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2;
#endif
c->weight_h264_pixels_tab[0]= ff_h264_weight_16_mmx2;
c->weight_h264_pixels_tab[1]= ff_h264_weight_8_mmx2;
@ -436,12 +436,12 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
if (mm_flags & AV_CPU_FLAG_MMX) {
if (mm_flags & AV_CPU_FLAG_MMX2) {
#if ARCH_X86_32
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_mmxext;
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_mmxext;
c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmxext;
c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmxext;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_mmx2;
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_mmx2;
c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmx2;
c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmx2;
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmx2;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmx2;
#endif
c->h264_idct_dc_add= ff_h264_idct_dc_add_10_mmx2;
if (mm_flags&AV_CPU_FLAG_SSE2) {

@ -46,7 +46,7 @@ SECTION .text
%endmacro
%macro rv34_idct 1
cglobal rv34_idct_%1_mmx2, 1, 2, 0
cglobal rv34_idct_%1, 1, 2, 0
movsx r1, word [r0]
IDCT_DC r1
movd m0, r1d
@ -58,14 +58,15 @@ cglobal rv34_idct_%1_mmx2, 1, 2, 0
REP_RET
%endmacro
INIT_MMX
INIT_MMX mmx2
%define IDCT_DC IDCT_DC_ROUND
rv34_idct dc
%define IDCT_DC IDCT_DC_NOROUND
rv34_idct dc_noround
; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc);
cglobal rv34_idct_dc_add_mmx, 3, 3
INIT_MMX mmx
cglobal rv34_idct_dc_add, 3, 3
; calculate DC
IDCT_DC_ROUND r2
pxor m1, m1
@ -167,8 +168,8 @@ cglobal rv34_idct_add, 3,3,0, d, s, b
ret
; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc);
INIT_XMM
cglobal rv34_idct_dc_add_sse4, 3, 3, 6
INIT_XMM sse4
cglobal rv34_idct_dc_add, 3, 3, 6
; load data
IDCT_DC_ROUND r2
pxor m1, m1

@ -102,8 +102,8 @@ SECTION .text
mov [r0+r3 -1], r2w
%endmacro
INIT_MMX
cglobal vp3_v_loop_filter_mmx2, 3, 4
INIT_MMX mmx2
cglobal vp3_v_loop_filter, 3, 4
%if ARCH_X86_64
movsxd r1, r1d
%endif
@ -120,7 +120,7 @@ cglobal vp3_v_loop_filter_mmx2, 3, 4
movq [r0 ], m3
RET
cglobal vp3_h_loop_filter_mmx2, 3, 4
cglobal vp3_h_loop_filter, 3, 4
%if ARCH_X86_64
movsxd r1, r1d
%endif
@ -354,38 +354,6 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
movq I(2), m2
%endmacro
%macro VP3_IDCT_mmx 1
; eax = quantized input
; ebx = dequantizer matrix
; ecx = IDCT constants
; M(I) = ecx + MaskOffset(0) + I * 8
; C(I) = ecx + CosineOffset(32) + (I-1) * 8
; edx = output
; r0..r7 = mm0..mm7
%define OC_8 [pw_8]
%define C(x) [vp3_idct_data+16*(x-1)]
; at this point, function has completed dequantization + dezigzag +
; partial transposition; now do the idct itself
%define I(x) [%1+16* x ]
%define J(x) [%1+16*(x-4)+8]
RowIDCT
Transpose
%define I(x) [%1+16* x +64]
%define J(x) [%1+16*(x-4)+72]
RowIDCT
Transpose
%define I(x) [%1+16*x]
%define J(x) [%1+16*x]
ColumnIDCT
%define I(x) [%1+16*x+8]
%define J(x) [%1+16*x+8]
ColumnIDCT
%endmacro
%macro VP3_1D_IDCT_SSE2 0
movdqa m2, I(3) ; xmm2 = i3
movdqa m6, C(3) ; xmm6 = c3
@ -501,7 +469,8 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
movdqa O(7), m%8
%endmacro
%macro VP3_IDCT_sse2 1
%macro VP3_IDCT 1
%if mmsize == 16
%define I(x) [%1+16*x]
%define O(x) [%1+16*x]
%define C(x) [vp3_idct_data+16*(x-1)]
@ -519,11 +488,42 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
%define ADD(x) paddsw x, [pw_8]
VP3_1D_IDCT_SSE2
PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7
%else ; mmsize == 8
; eax = quantized input
; ebx = dequantizer matrix
; ecx = IDCT constants
; M(I) = ecx + MaskOffset(0) + I * 8
; C(I) = ecx + CosineOffset(32) + (I-1) * 8
; edx = output
; r0..r7 = mm0..mm7
%define OC_8 [pw_8]
%define C(x) [vp3_idct_data+16*(x-1)]
; at this point, function has completed dequantization + dezigzag +
; partial transposition; now do the idct itself
%define I(x) [%1+16* x ]
%define J(x) [%1+16*(x-4)+8]
RowIDCT
Transpose
%define I(x) [%1+16* x +64]
%define J(x) [%1+16*(x-4)+72]
RowIDCT
Transpose
%define I(x) [%1+16*x]
%define J(x) [%1+16*x]
ColumnIDCT
%define I(x) [%1+16*x+8]
%define J(x) [%1+16*x+8]
ColumnIDCT
%endif ; mmsize == 16/8
%endmacro
%macro vp3_idct_funcs 1
cglobal vp3_idct_put_%1, 3, 4, 9
VP3_IDCT_%1 r2
%macro vp3_idct_funcs 0
cglobal vp3_idct_put, 3, 4, 9
VP3_IDCT r2
movsxdifnidn r1, r1d
mova m4, [pb_80]
@ -565,8 +565,8 @@ cglobal vp3_idct_put_%1, 3, 4, 9
%endrep
RET
cglobal vp3_idct_add_%1, 3, 4, 9
VP3_IDCT_%1 r2
cglobal vp3_idct_add, 3, 4, 9
VP3_IDCT r2
mov r3, 4
pxor m4, m4
@ -607,10 +607,13 @@ cglobal vp3_idct_add_%1, 3, 4, 9
RET
%endmacro
INIT_MMX
vp3_idct_funcs mmx
INIT_XMM
vp3_idct_funcs sse2
%if ARCH_X86_32
INIT_MMX mmx
vp3_idct_funcs
%endif
INIT_XMM sse2
vp3_idct_funcs
%macro DC_ADD 0
movq m2, [r0 ]
@ -631,8 +634,8 @@ vp3_idct_funcs sse2
movq [r0+r3 ], m5
%endmacro
INIT_MMX
cglobal vp3_idct_dc_add_mmx2, 3, 4
INIT_MMX mmx2
cglobal vp3_idct_dc_add, 3, 4
%if ARCH_X86_64
movsxd r1, r1d
%endif

@ -41,11 +41,13 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
#if HAVE_YASM
int cpuflags = av_get_cpu_flags();
#if ARCH_X86_32
if (HAVE_MMX && cpuflags & AV_CPU_FLAG_MMX) {
c->idct_put = ff_vp3_idct_put_mmx;
c->idct_add = ff_vp3_idct_add_mmx;
c->idct_perm = FF_PARTTRANS_IDCT_PERM;
}
#endif
if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) {
c->idct_dc_add = ff_vp3_idct_dc_add_mmx2;

@ -27,7 +27,8 @@ cextern pw_64
SECTION .text
%macro DIAG4_MMX 6
%macro DIAG4 6
%if mmsize == 8
movq m0, [%1+%2]
movq m1, [%1+%3]
movq m3, m0
@ -64,9 +65,7 @@ SECTION .text
psraw m3, 7
packuswb m0, m3
movq [%6], m0
%endmacro
%macro DIAG4_SSE2 6
%else ; mmsize == 16
movq m0, [%1+%2]
movq m1, [%1+%3]
punpcklbw m0, m7
@ -86,9 +85,11 @@ SECTION .text
psraw m0, 7
packuswb m0, m0
movq [%6], m0
%endif ; mmsize == 8/16
%endmacro
%macro SPLAT4REGS_MMX 0
%macro SPLAT4REGS 0
%if mmsize == 8
movq m5, m3
punpcklwd m3, m3
movq m4, m3
@ -102,9 +103,7 @@ SECTION .text
movq [rsp+8*12], m4
movq [rsp+8*13], m5
movq [rsp+8*14], m2
%endmacro
%macro SPLAT4REGS_SSE2 0
%else ; mmsize == 16
pshuflw m4, m3, 0x0
pshuflw m5, m3, 0x55
pshuflw m6, m3, 0xAA
@ -113,15 +112,16 @@ SECTION .text
punpcklqdq m5, m5
punpcklqdq m6, m6
punpcklqdq m3, m3
%endif ; mmsize == 8/16
%endmacro
%macro vp6_filter_diag4 2
%macro vp6_filter_diag4 0
; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride,
; const int16_t h_weight[4], const int16_t v_weights[4])
cglobal vp6_filter_diag4_%1, 5, 7, %2
cglobal vp6_filter_diag4, 5, 7, 8
mov r5, rsp ; backup stack pointer
and rsp, ~(mmsize-1) ; align stack
%ifidn %1, sse2
%if mmsize == 16
sub rsp, 8*11
%else
sub rsp, 8*15
@ -162,12 +162,10 @@ cglobal vp6_filter_diag4_%1, 5, 7, %2
RET
%endmacro
INIT_MMX
%define DIAG4 DIAG4_MMX
%define SPLAT4REGS SPLAT4REGS_MMX
vp6_filter_diag4 mmx, 0
%if ARCH_X86_32
INIT_MMX mmx
vp6_filter_diag4
%endif
INIT_XMM
%define DIAG4 DIAG4_SSE2
%define SPLAT4REGS SPLAT4REGS_SSE2
vp6_filter_diag4 sse2, 8
INIT_XMM sse2
vp6_filter_diag4

@ -36,9 +36,11 @@ av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum CodecID codec)
int mm_flags = av_get_cpu_flags();
if (CONFIG_VP6_DECODER && codec == CODEC_ID_VP6) {
#if ARCH_X86_32
if (mm_flags & AV_CPU_FLAG_MMX) {
c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
}
#endif
if (mm_flags & AV_CPU_FLAG_SSE2) {
c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;

@ -35,7 +35,7 @@ OBJS-$(CONFIG_SNDIO_OUTDEV) += sndio_common.o sndio_enc.o
OBJS-$(CONFIG_V4L2_INDEV) += v4l2.o timefilter.o
OBJS-$(CONFIG_V4L_INDEV) += v4l.o
OBJS-$(CONFIG_VFWCAP_INDEV) += vfwcap.o
OBJS-$(CONFIG_X11_GRAB_DEVICE_INDEV) += x11grab.o
OBJS-$(CONFIG_X11GRAB_INDEV) += x11grab.o
# external libraries
OBJS-$(CONFIG_LIBCDIO_INDEV) += libcdio.o

@ -55,7 +55,7 @@ void avdevice_register_all(void)
REGISTER_INDEV (V4L2, v4l2);
// REGISTER_INDEV (V4L, v4l
REGISTER_INDEV (VFWCAP, vfwcap);
REGISTER_INDEV (X11_GRAB_DEVICE, x11_grab_device);
REGISTER_INDEV (X11GRAB, x11grab);
/* external libraries */
REGISTER_INDEV (LIBCDIO, libcdio);

@ -36,7 +36,7 @@
* (de)muxers in libavdevice are of the AVFMT_NOFILE type (they use their own
* I/O functions). The filename passed to avformat_open_input() often does not
* refer to an actually existing file, but has some special device-specific
* meaning - e.g. for the x11grab device it is the display name.
* meaning - e.g. for x11grab it is the display name.
*
* To use libavdevice, simply call avdevice_register_all() to register all
* compiled muxers and demuxers. They all use standard libavformat API.

@ -39,6 +39,7 @@
#include "libavutil/log.h"
#include "libavutil/mem.h"
#include "libavutil/opt.h"
#include "libavutil/time.h"
#include "libavutil/parseutils.h"
#include "libavutil/pixdesc.h"
#include "avdevice.h"

@ -27,6 +27,7 @@
#include "libavutil/log.h"
#include "libavutil/fifo.h"
#include "libavutil/opt.h"
#include "libavutil/time.h"
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libavformat/internal.h"

@ -36,6 +36,7 @@
#include "libavutil/log.h"
#include "libavutil/opt.h"
#include "libavutil/time.h"
#include "libavcodec/avcodec.h"
#include "avdevice.h"
#include "libavformat/internal.h"

@ -41,6 +41,7 @@
#include "libavutil/log.h"
#include "libavutil/opt.h"
#include "libavutil/parseutils.h"
#include "libavutil/time.h"
#include <time.h>
#include <X11/X.h>
#include <X11/Xlib.h>
@ -56,8 +57,7 @@
/**
* X11 Device Demuxer context
*/
struct x11_grab
{
struct x11grab {
const AVClass *class; /**< Class for private options. */
int frame_size; /**< Size in bytes of a grabbed frame */
AVRational time_base; /**< Time base */
@ -84,10 +84,10 @@ struct x11_grab
/**
* Draw grabbing region window
*
* @param s x11_grab context
* @param s x11grab context
*/
static void
x11grab_draw_region_win(struct x11_grab *s)
x11grab_draw_region_win(struct x11grab *s)
{
Display *dpy = s->dpy;
int screen;
@ -109,10 +109,10 @@ x11grab_draw_region_win(struct x11_grab *s)
/**
* Initialize grabbing region window
*
* @param s x11_grab context
* @param s x11grab context
*/
static void
x11grab_region_win_init(struct x11_grab *s)
x11grab_region_win_init(struct x11grab *s)
{
Display *dpy = s->dpy;
int screen;
@ -154,7 +154,7 @@ x11grab_region_win_init(struct x11_grab *s)
static int
x11grab_read_header(AVFormatContext *s1)
{
struct x11_grab *x11grab = s1->priv_data;
struct x11grab *x11grab = s1->priv_data;
Display *dpy;
AVStream *st = NULL;
enum PixelFormat input_pixfmt;
@ -330,7 +330,7 @@ out:
* coordinates
*/
static void
paint_mouse_pointer(XImage *image, struct x11_grab *s)
paint_mouse_pointer(XImage *image, struct x11grab *s)
{
int x_off = s->x_off;
int y_off = s->y_off;
@ -444,7 +444,7 @@ xget_zpixmap(Display *dpy, Drawable d, XImage *image, int x, int y)
static int
x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt)
{
struct x11_grab *s = s1->priv_data;
struct x11grab *s = s1->priv_data;
Display *dpy = s->dpy;
XImage *image = s->image;
int x_off = s->x_off;
@ -554,7 +554,7 @@ x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt)
static int
x11grab_read_close(AVFormatContext *s1)
{
struct x11_grab *x11grab = s1->priv_data;
struct x11grab *x11grab = s1->priv_data;
/* Detach cleanly from shared mem */
if (x11grab->use_shm) {
@ -578,7 +578,7 @@ x11grab_read_close(AVFormatContext *s1)
return 0;
}
#define OFFSET(x) offsetof(struct x11_grab, x)
#define OFFSET(x) offsetof(struct x11grab, x)
#define DEC AV_OPT_FLAG_DECODING_PARAM
static const AVOption options[] = {
{ "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(width), AV_OPT_TYPE_IMAGE_SIZE, {.str = "vga"}, 0, 0, DEC },
@ -599,10 +599,10 @@ static const AVClass x11_class = {
};
/** x11 grabber device demuxer declaration */
AVInputFormat ff_x11_grab_device_demuxer = {
AVInputFormat ff_x11grab_demuxer = {
.name = "x11grab",
.long_name = NULL_IF_CONFIG_SMALL("X11grab"),
.priv_data_size = sizeof(struct x11_grab),
.priv_data_size = sizeof(struct x11grab),
.read_header = x11grab_read_header,
.read_packet = x11grab_read_packet,
.read_close = x11grab_read_close,

@ -29,7 +29,7 @@
#include "libavutil/avutil.h"
#define LIBAVFILTER_VERSION_MAJOR 3
#define LIBAVFILTER_VERSION_MINOR 4
#define LIBAVFILTER_VERSION_MINOR 5
#define LIBAVFILTER_VERSION_MICRO 100
#define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \

@ -420,8 +420,6 @@ reload:
ret = ffurl_read(v->input, buf, buf_size);
if (ret > 0)
return ret;
if (ret < 0 && ret != AVERROR_EOF)
return ret;
ffurl_close(v->input);
v->input = NULL;
v->cur_seq_no++;

@ -196,7 +196,6 @@ static int mp3_read_packet(AVFormatContext *s, AVPacket *pkt)
MP3Context *mp3 = s->priv_data;
int ret, size;
int64_t pos;
// AVStream *st = s->streams[0];
size= MP3_PACKET_SIZE;
pos = avio_tell(s->pb);
@ -204,15 +203,15 @@ static int mp3_read_packet(AVFormatContext *s, AVPacket *pkt)
size= FFMIN(size, mp3->filesize - pos);
ret= av_get_packet(s->pb, pkt, size);
pkt->flags &= ~AV_PKT_FLAG_CORRUPT;
pkt->stream_index = 0;
if (ret <= 0) {
if(ret<0)
return ret;
return AVERROR_EOF;
}
pkt->flags &= ~AV_PKT_FLAG_CORRUPT;
pkt->stream_index = 0;
if (ret >= ID3v1_TAG_SIZE &&
memcmp(&pkt->data[ret - ID3v1_TAG_SIZE], "TAG", 3) == 0)
ret -= ID3v1_TAG_SIZE;

@ -21,6 +21,7 @@
#include "libavutil/mathematics.h"
#include "libavutil/avstring.h"
#include "libavutil/time.h"
#include "libavcodec/get_bits.h"
#include "avformat.h"
#include "mpegts.h"

@ -27,6 +27,7 @@
#include "libavutil/random_seed.h"
#include "libavutil/dict.h"
#include "libavutil/opt.h"
#include "libavutil/time.h"
#include "avformat.h"
#include "avio_internal.h"

@ -23,6 +23,7 @@
#include "libavutil/intreadwrite.h"
#include "libavutil/mathematics.h"
#include "libavutil/random_seed.h"
#include "libavutil/time.h"
#include "avformat.h"
#include "internal.h"

@ -31,6 +31,7 @@
#include "avio_internal.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/avstring.h"
#include "libavutil/time.h"
#include "url.h"
#define SDP_MAX_SIZE 16384

@ -24,6 +24,7 @@
#include "libavutil/random_seed.h"
#include "libavutil/avstring.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/time.h"
#include "internal.h"
#include "network.h"
#include "os_support.h"

@ -266,7 +266,7 @@ static int tls_read(URLContext *h, uint8_t *buf, int size)
if (ret > 0)
return ret;
if (ret == 0)
return AVERROR(EIO);
return AVERROR_EOF;
if ((ret = do_tls_poll(h, ret)) < 0)
return ret;
}
@ -281,7 +281,7 @@ static int tls_write(URLContext *h, const uint8_t *buf, int size)
if (ret > 0)
return ret;
if (ret == 0)
return AVERROR(EIO);
return AVERROR_EOF;
if ((ret = do_tls_poll(h, ret)) < 0)
return ret;
}

@ -37,6 +37,7 @@
#include "libavutil/avstring.h"
#include "libavutil/mathematics.h"
#include "libavutil/parseutils.h"
#include "libavutil/time.h"
#include "libavutil/timestamp.h"
#include "riff.h"
#include "audiointerleave.h"
@ -2765,9 +2766,7 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
if (tb_unreliable(st->codec) && st->info->duration_count > 15 && st->info->duration_gcd > FFMAX(1, st->time_base.den/(500LL*st->time_base.num)) && !st->r_frame_rate.num)
av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den, st->time_base.den, st->time_base.num * st->info->duration_gcd, INT_MAX);
if (st->info->duration_count && !st->r_frame_rate.num
&& tb_unreliable(st->codec) /*&&
//FIXME we should not special-case MPEG-2, but this needs testing with non-MPEG-2 ...
st->time_base.num*duration_sum[i]/st->info->duration_count*101LL > st->time_base.den*/){
&& tb_unreliable(st->codec)) {
int num = 0;
double best_error= 0.01;

@ -90,17 +90,17 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
if(!append){
tag = avio_rl32(pb);
if (tag != MKTAG('w', 'v', 'p', 'k'))
return -1;
return AVERROR_INVALIDDATA;
size = avio_rl32(pb);
if(size < 24 || size > WV_BLOCK_LIMIT){
av_log(ctx, AV_LOG_ERROR, "Incorrect block size %i\n", size);
return -1;
return AVERROR_INVALIDDATA;
}
wc->blksize = size;
ver = avio_rl16(pb);
if(ver < 0x402 || ver > 0x410){
av_log(ctx, AV_LOG_ERROR, "Unsupported version %03X\n", ver);
return -1;
return AVERROR_PATCHWELCOME;
}
avio_r8(pb); // track no
avio_r8(pb); // track sub index
@ -128,7 +128,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
int64_t block_end = avio_tell(pb) + wc->blksize - 24;
if(!pb->seekable){
av_log(ctx, AV_LOG_ERROR, "Cannot determine additional parameters\n");
return -1;
return AVERROR_INVALIDDATA;
}
while(avio_tell(pb) < block_end){
int id, size;
@ -141,7 +141,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
case 0xD:
if(size <= 1){
av_log(ctx, AV_LOG_ERROR, "Insufficient channel information\n");
return -1;
return AVERROR_INVALIDDATA;
}
chan = avio_r8(pb);
switch(size - 2){
@ -164,7 +164,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
break;
default:
av_log(ctx, AV_LOG_ERROR, "Invalid channel info size %d\n", size);
return -1;
return AVERROR_INVALIDDATA;
}
break;
case 0x27:
@ -178,7 +178,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
}
if(rate == -1){
av_log(ctx, AV_LOG_ERROR, "Cannot determine custom sampling rate\n");
return -1;
return AVERROR_INVALIDDATA;
}
avio_seek(pb, block_end - wc->blksize + 24, SEEK_SET);
}
@ -189,15 +189,15 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
if(wc->flags && bpp != wc->bpp){
av_log(ctx, AV_LOG_ERROR, "Bits per sample differ, this block: %i, header block: %i\n", bpp, wc->bpp);
return -1;
return AVERROR_INVALIDDATA;
}
if(wc->flags && !wc->multichannel && chan != wc->chan){
av_log(ctx, AV_LOG_ERROR, "Channels differ, this block: %i, header block: %i\n", chan, wc->chan);
return -1;
return AVERROR_INVALIDDATA;
}
if(wc->flags && rate != -1 && rate != wc->rate){
av_log(ctx, AV_LOG_ERROR, "Sampling rate differ, this block: %i, header block: %i\n", rate, wc->rate);
return -1;
return AVERROR_INVALIDDATA;
}
wc->blksize = size - 24;
return 0;
@ -208,11 +208,12 @@ static int wv_read_header(AVFormatContext *s)
AVIOContext *pb = s->pb;
WVContext *wc = s->priv_data;
AVStream *st;
int ret;
wc->block_parsed = 0;
for(;;){
if(wv_read_block_header(s, pb, 0) < 0)
return -1;
if ((ret = wv_read_block_header(s, pb, 0)) < 0)
return ret;
if(!AV_RN32(wc->extra))
avio_skip(pb, wc->blksize - 24);
else
@ -222,7 +223,7 @@ static int wv_read_header(AVFormatContext *s)
/* now we are ready: build format streams */
st = avformat_new_stream(s, NULL);
if (!st)
return -1;
return AVERROR(ENOMEM);
st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
st->codec->codec_id = CODEC_ID_WAVPACK;
st->codec->channels = wc->chan;
@ -254,10 +255,10 @@ static int wv_read_packet(AVFormatContext *s,
uint32_t block_samples;
if (url_feof(s->pb))
return AVERROR(EIO);
return AVERROR_EOF;
if(wc->block_parsed){
if(wv_read_block_header(s, s->pb, 0) < 0)
return -1;
if ((ret = wv_read_block_header(s, s->pb, 0)) < 0)
return ret;
}
pos = wc->pos;
@ -275,7 +276,7 @@ static int wv_read_packet(AVFormatContext *s,
while(!(wc->flags & WV_END_BLOCK)){
if(avio_rl32(s->pb) != MKTAG('w', 'v', 'p', 'k')){
av_free_packet(pkt);
return -1;
return AVERROR_INVALIDDATA;
}
if((ret = av_append_packet(s->pb, pkt, 4)) < 0){
av_free_packet(pkt);
@ -285,14 +286,14 @@ static int wv_read_packet(AVFormatContext *s,
if(size < 24 || size > WV_BLOCK_LIMIT){
av_free_packet(pkt);
av_log(s, AV_LOG_ERROR, "Incorrect block size %d\n", size);
return -1;
return AVERROR_INVALIDDATA;
}
wc->blksize = size;
ver = avio_rl16(s->pb);
if(ver < 0x402 || ver > 0x410){
av_free_packet(pkt);
av_log(s, AV_LOG_ERROR, "Unsupported version %03X\n", ver);
return -1;
return AVERROR_PATCHWELCOME;
}
avio_r8(s->pb); // track no
avio_r8(s->pb); // track sub index
@ -304,9 +305,9 @@ static int wv_read_packet(AVFormatContext *s,
}
memcpy(wc->extra, pkt->data + pkt->size - WV_EXTRA_SIZE, WV_EXTRA_SIZE);
if(wv_read_block_header(s, s->pb, 1) < 0){
if ((ret = wv_read_block_header(s, s->pb, 1)) < 0){
av_free_packet(pkt);
return -1;
return ret;
}
ret = av_append_packet(s->pb, pkt, wc->blksize);
if(ret < 0){
@ -345,14 +346,14 @@ static int wv_read_seek(AVFormatContext *s, int stream_index, int64_t timestamp,
}
/* if timestamp is out of bounds, return error */
if(timestamp < 0 || timestamp >= s->duration)
return -1;
return AVERROR(EINVAL);
pos = avio_tell(s->pb);
do{
ret = av_read_frame(s, pkt);
if (ret < 0){
avio_seek(s->pb, pos, SEEK_SET);
return -1;
return ret;
}
pts = pkt->pts;
av_free_packet(pkt);

@ -36,8 +36,8 @@
%define program_name ff
%define UNIX64 0
%define WIN64 0
%define UNIX64 0
%if ARCH_X86_64
%ifidn __OUTPUT_FORMAT__,win32
%define WIN64 1
@ -54,11 +54,6 @@
%define mangle(x) x
%endif
; FIXME: All of the 64bit asm functions that take a stride as an argument
; via register, assume that the high dword of that register is filled with 0.
; This is true in practice (since we never do any 64bit arithmetic on strides,
; and x264's strides are all positive), but is not guaranteed by the ABI.
; Name of the .rodata section.
%macro SECTION_RODATA 0-1 16
; Kludge: Something on OS X fails to align .rodata even given an align
@ -152,34 +147,38 @@ CPU amdnop
; registers:
; rN and rNq are the native-size register holding function argument N
; rNd, rNw, rNb are dword, word, and byte size
; rNh is the high 8 bits of the word size
; rNm is the original location of arg N (a register or on the stack), dword
; rNmp is native size
%macro DECLARE_REG 5-6
%macro DECLARE_REG 2-3
%define r%1q %2
%define r%1d %3
%define r%1w %4
%define r%1b %5
%if %0 == 5
%define r%1m %3
%define r%1d %2d
%define r%1w %2w
%define r%1b %2b
%define r%1h %2h
%if %0 == 2
%define r%1m %2d
%define r%1mp %2
%elif ARCH_X86_64 ; memory
%define r%1m [rsp + stack_offset + %6]
%define r%1m [rsp + stack_offset + %3]
%define r%1mp qword r %+ %1 %+ m
%else
%define r%1m [esp + stack_offset + %6]
%define r%1m [esp + stack_offset + %3]
%define r%1mp dword r %+ %1 %+ m
%endif
%define r%1 %2
%endmacro
%macro DECLARE_REG_SIZE 2
%macro DECLARE_REG_SIZE 3
%define r%1q r%1
%define e%1q r%1
%define r%1d e%1
%define e%1d e%1
%define r%1w %1
%define e%1w %1
%define r%1h %3
%define e%1h %3
%define r%1b %2
%define e%1b %2
%if ARCH_X86_64 == 0
@ -187,13 +186,13 @@ CPU amdnop
%endif
%endmacro
DECLARE_REG_SIZE ax, al
DECLARE_REG_SIZE bx, bl
DECLARE_REG_SIZE cx, cl
DECLARE_REG_SIZE dx, dl
DECLARE_REG_SIZE si, sil
DECLARE_REG_SIZE di, dil
DECLARE_REG_SIZE bp, bpl
DECLARE_REG_SIZE ax, al, ah
DECLARE_REG_SIZE bx, bl, bh
DECLARE_REG_SIZE cx, cl, ch
DECLARE_REG_SIZE dx, dl, dh
DECLARE_REG_SIZE si, sil, null
DECLARE_REG_SIZE di, dil, null
DECLARE_REG_SIZE bp, bpl, null
; t# defines for when per-arch register allocation is more complex than just function arguments
@ -211,6 +210,7 @@ DECLARE_REG_SIZE bp, bpl
%define t%1q t%1 %+ q
%define t%1d t%1 %+ d
%define t%1w t%1 %+ w
%define t%1h t%1 %+ h
%define t%1b t%1 %+ b
%rotate 1
%endrep
@ -300,6 +300,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
CAT_UNDEF arg_name %+ %%i, q
CAT_UNDEF arg_name %+ %%i, d
CAT_UNDEF arg_name %+ %%i, w
CAT_UNDEF arg_name %+ %%i, h
CAT_UNDEF arg_name %+ %%i, b
CAT_UNDEF arg_name %+ %%i, m
CAT_UNDEF arg_name %+ %%i, mp
@ -315,6 +316,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%xdefine %1q r %+ %%i %+ q
%xdefine %1d r %+ %%i %+ d
%xdefine %1w r %+ %%i %+ w
%xdefine %1h r %+ %%i %+ h
%xdefine %1b r %+ %%i %+ b
%xdefine %1m r %+ %%i %+ m
%xdefine %1mp r %+ %%i %+ mp
@ -328,21 +330,21 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
%if WIN64 ; Windows x64 ;=================================================
DECLARE_REG 0, rcx, ecx, cx, cl
DECLARE_REG 1, rdx, edx, dx, dl
DECLARE_REG 2, R8, R8D, R8W, R8B
DECLARE_REG 3, R9, R9D, R9W, R9B
DECLARE_REG 4, R10, R10D, R10W, R10B, 40
DECLARE_REG 5, R11, R11D, R11W, R11B, 48
DECLARE_REG 6, rax, eax, ax, al, 56
DECLARE_REG 7, rdi, edi, di, dil, 64
DECLARE_REG 8, rsi, esi, si, sil, 72
DECLARE_REG 9, rbx, ebx, bx, bl, 80
DECLARE_REG 10, rbp, ebp, bp, bpl, 88
DECLARE_REG 11, R12, R12D, R12W, R12B, 96
DECLARE_REG 12, R13, R13D, R13W, R13B, 104
DECLARE_REG 13, R14, R14D, R14W, R14B, 112
DECLARE_REG 14, R15, R15D, R15W, R15B, 120
DECLARE_REG 0, rcx
DECLARE_REG 1, rdx
DECLARE_REG 2, R8
DECLARE_REG 3, R9
DECLARE_REG 4, R10, 40
DECLARE_REG 5, R11, 48
DECLARE_REG 6, rax, 56
DECLARE_REG 7, rdi, 64
DECLARE_REG 8, rsi, 72
DECLARE_REG 9, rbx, 80
DECLARE_REG 10, rbp, 88
DECLARE_REG 11, R12, 96
DECLARE_REG 12, R13, 104
DECLARE_REG 13, R14, 112
DECLARE_REG 14, R15, 120
%macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
%assign num_args %1
@ -389,6 +391,8 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
%assign xmm_regs_used 0
%endmacro
%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
%macro RET 0
WIN64_RESTORE_XMM_INTERNAL rsp
POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
@ -398,31 +402,23 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
ret
%endmacro
%macro REP_RET 0
%if regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
RET
%else
rep ret
%endif
%endmacro
%elif ARCH_X86_64 ; *nix x64 ;=============================================
DECLARE_REG 0, rdi, edi, di, dil
DECLARE_REG 1, rsi, esi, si, sil
DECLARE_REG 2, rdx, edx, dx, dl
DECLARE_REG 3, rcx, ecx, cx, cl
DECLARE_REG 4, R8, R8D, R8W, R8B
DECLARE_REG 5, R9, R9D, R9W, R9B
DECLARE_REG 6, rax, eax, ax, al, 8
DECLARE_REG 7, R10, R10D, R10W, R10B, 16
DECLARE_REG 8, R11, R11D, R11W, R11B, 24
DECLARE_REG 9, rbx, ebx, bx, bl, 32
DECLARE_REG 10, rbp, ebp, bp, bpl, 40
DECLARE_REG 11, R12, R12D, R12W, R12B, 48
DECLARE_REG 12, R13, R13D, R13W, R13B, 56
DECLARE_REG 13, R14, R14D, R14W, R14B, 64
DECLARE_REG 14, R15, R15D, R15W, R15B, 72
DECLARE_REG 0, rdi
DECLARE_REG 1, rsi
DECLARE_REG 2, rdx
DECLARE_REG 3, rcx
DECLARE_REG 4, R8
DECLARE_REG 5, R9
DECLARE_REG 6, rax, 8
DECLARE_REG 7, R10, 16
DECLARE_REG 8, R11, 24
DECLARE_REG 9, rbx, 32
DECLARE_REG 10, rbp, 40
DECLARE_REG 11, R12, 48
DECLARE_REG 12, R13, 56
DECLARE_REG 13, R14, 64
DECLARE_REG 14, R15, 72
%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
%assign num_args %1
@ -434,6 +430,8 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72
DEFINE_ARGS %4
%endmacro
%define has_epilogue regs_used > 9 || mmsize == 32
%macro RET 0
POP_IF_USED 14, 13, 12, 11, 10, 9
%if mmsize == 32
@ -442,23 +440,15 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72
ret
%endmacro
%macro REP_RET 0
%if regs_used > 9 || mmsize == 32
RET
%else
rep ret
%endif
%endmacro
%else ; X86_32 ;==============================================================
DECLARE_REG 0, eax, eax, ax, al, 4
DECLARE_REG 1, ecx, ecx, cx, cl, 8
DECLARE_REG 2, edx, edx, dx, dl, 12
DECLARE_REG 3, ebx, ebx, bx, bl, 16
DECLARE_REG 4, esi, esi, si, null, 20
DECLARE_REG 5, edi, edi, di, null, 24
DECLARE_REG 6, ebp, ebp, bp, null, 28
DECLARE_REG 0, eax, 4
DECLARE_REG 1, ecx, 8
DECLARE_REG 2, edx, 12
DECLARE_REG 3, ebx, 16
DECLARE_REG 4, esi, 20
DECLARE_REG 5, edi, 24
DECLARE_REG 6, ebp, 28
%define rsp esp
%macro DECLARE_ARG 1-*
@ -474,6 +464,9 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
%assign num_args %1
%assign regs_used %2
%if num_args > 7
%assign num_args 7
%endif
%if regs_used > 7
%assign regs_used 7
%endif
@ -483,6 +476,8 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
DEFINE_ARGS %4
%endmacro
%define has_epilogue regs_used > 3 || mmsize == 32
%macro RET 0
POP_IF_USED 6, 5, 4, 3
%if mmsize == 32
@ -491,14 +486,6 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
ret
%endmacro
%macro REP_RET 0
%if regs_used > 3 || mmsize == 32
RET
%else
rep ret
%endif
%endmacro
%endif ;======================================================================
%if WIN64 == 0
@ -508,6 +495,23 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
%endmacro
%endif
%macro REP_RET 0
%if has_epilogue
RET
%else
rep ret
%endif
%endmacro
%macro TAIL_CALL 2 ; callee, is_nonadjacent
%if has_epilogue
call %1
RET
%elif %2
jmp %1
%endif
%endmacro
;=============================================================================
; arch-independent part
;=============================================================================
@ -597,6 +601,8 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%assign cpuflags_avx (1<<11)| cpuflags_sse42
%assign cpuflags_xop (1<<12)| cpuflags_avx
%assign cpuflags_fma4 (1<<13)| cpuflags_avx
%assign cpuflags_avx2 (1<<14)| cpuflags_avx
%assign cpuflags_fma3 (1<<15)| cpuflags_avx
%assign cpuflags_cache32 (1<<16)
%assign cpuflags_cache64 (1<<17)
@ -605,6 +611,9 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
%assign cpuflags_misalign (1<<20)
%assign cpuflags_aligned (1<<21) ; not a cpu feature, but a function variant
%assign cpuflags_atom (1<<22)
%assign cpuflags_bmi1 (1<<23)
%assign cpuflags_bmi2 (1<<24)|cpuflags_bmi1
%assign cpuflags_tbm (1<<25)|cpuflags_bmi1
%define cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
%define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
@ -875,25 +884,38 @@ INIT_XMM
%endrep
%undef i
%macro CHECK_AVX_INSTR_EMU 3-*
%xdefine %%opcode %1
%xdefine %%dst %2
%rep %0-2
%ifidn %%dst, %3
%error non-avx emulation of ``%%opcode'' is not supported
%endif
%rotate 1
%endrep
%endmacro
;%1 == instruction
;%2 == 1 if float, 0 if int
;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
;%4 == number of operands given
;%5+: operands
%macro RUN_AVX_INSTR 6-7+
%ifid %5
%define %%size sizeof%5
%ifid %6
%define %%sizeofreg sizeof%6
%elifid %5
%define %%sizeofreg sizeof%5
%else
%define %%size mmsize
%define %%sizeofreg mmsize
%endif
%if %%size==32
%if %0 >= 7
%if %%sizeofreg==32
%if %4>=3
v%1 %5, %6, %7
%else
v%1 %5, %6
%endif
%else
%if %%size==8
%if %%sizeofreg==8
%define %%regmov movq
%elif %2
%define %%regmov movaps
@ -903,16 +925,17 @@ INIT_XMM
%if %4>=3+%3
%ifnidn %5, %6
%if avx_enabled && sizeof%5==16
%if avx_enabled && %%sizeofreg==16
v%1 %5, %6, %7
%else
CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
%%regmov %5, %6
%1 %5, %7
%endif
%else
%1 %5, %7
%endif
%elif %3
%elif %4>=3
%1 %5, %6, %7
%else
%1 %5, %6
@ -943,7 +966,7 @@ INIT_XMM
;%1 == instruction
;%2 == 1 if float, 0 if int
;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 3-operand (xmm, xmm, xmm)
;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
;%4 == 1 if symmetric (i.e. doesn't matter which src arg is which), 0 if not
%macro AVX_INSTR 4
%macro %1 2-9 fnord, fnord, fnord, %1, %2, %3, %4
@ -1008,6 +1031,9 @@ AVX_INSTR mulsd, 1, 0, 1
AVX_INSTR mulss, 1, 0, 1
AVX_INSTR orpd, 1, 0, 1
AVX_INSTR orps, 1, 0, 1
AVX_INSTR pabsb, 0, 0, 0
AVX_INSTR pabsw, 0, 0, 0
AVX_INSTR pabsd, 0, 0, 0
AVX_INSTR packsswb, 0, 0, 0
AVX_INSTR packssdw, 0, 0, 0
AVX_INSTR packuswb, 0, 0, 0
@ -1059,6 +1085,7 @@ AVX_INSTR pminsd, 0, 0, 1
AVX_INSTR pminub, 0, 0, 1
AVX_INSTR pminuw, 0, 0, 1
AVX_INSTR pminud, 0, 0, 1
AVX_INSTR pmovmskb, 0, 0, 0
AVX_INSTR pmulhuw, 0, 0, 1
AVX_INSTR pmulhrsw, 0, 0, 1
AVX_INSTR pmulhw, 0, 0, 1
@ -1069,6 +1096,9 @@ AVX_INSTR pmuldq, 0, 0, 1
AVX_INSTR por, 0, 0, 1
AVX_INSTR psadbw, 0, 0, 1
AVX_INSTR pshufb, 0, 0, 0
AVX_INSTR pshufd, 0, 1, 0
AVX_INSTR pshufhw, 0, 1, 0
AVX_INSTR pshuflw, 0, 1, 0
AVX_INSTR psignb, 0, 0, 0
AVX_INSTR psignw, 0, 0, 0
AVX_INSTR psignd, 0, 0, 0
@ -1090,6 +1120,7 @@ AVX_INSTR psubsb, 0, 0, 0
AVX_INSTR psubsw, 0, 0, 0
AVX_INSTR psubusb, 0, 0, 0
AVX_INSTR psubusw, 0, 0, 0
AVX_INSTR ptest, 0, 0, 0
AVX_INSTR punpckhbw, 0, 0, 0
AVX_INSTR punpckhwd, 0, 0, 0
AVX_INSTR punpckhdq, 0, 0, 0
@ -1154,3 +1185,7 @@ FMA_INSTR fmaddps, mulps, addps
FMA_INSTR pmacsdd, pmulld, paddd
FMA_INSTR pmacsww, pmullw, paddw
FMA_INSTR pmadcswd, pmaddwd, paddd
; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
; This lets us use tzcnt without bumping the yasm version requirement yet.
%define tzcnt rep bsf

@ -17,7 +17,7 @@ $(SUBDIR)%-test.o: $(SUBDIR)%.c
$(COMPILE_C)
$(SUBDIR)x86/%.o: $(SUBDIR)x86/%.asm
$(YASMDEP) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d)
$(DEPYASM) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d)
$(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $<
$(OBJS) $(OBJS:.o=.s) $(SUBDIR)%.ho $(TESTOBJS): CPPFLAGS += -DHAVE_AV_CONFIG_H

@ -144,8 +144,8 @@ int main(int argc, char **argv)
int nb_channels = 2;
char *ext;
if (argc < 2 || argc > 4) {
printf("usage: %s file [<sample rate> [<channels>]]\n"
if (argc < 2 || argc > 5) {
printf("usage: %s file [<sample rate> [<channels>] [<random seed>]]\n"
"generate a test raw 16 bit audio stream\n"
"If the file extension is .wav a WAVE header will be added.\n"
"default: 44100 Hz stereo\n", argv[0]);
@ -168,6 +168,9 @@ int main(int argc, char **argv)
}
}
if (argc > 4)
seed = atoi(argv[4]);
outfile = fopen(argv[1], "wb");
if (!outfile) {
perror(argv[1]);

@ -1,3 +1,25 @@
FATE_AMIX += fate-filter-amix-simple
fate-filter-amix-simple: CMD = ffmpeg -filter_complex amix -i $(SRC) -ss 3 -i $(SRC1) -f f32le -
fate-filter-amix-simple: REF = $(SAMPLES)/filter/amix_simple.pcm
FATE_AMIX += fate-filter-amix-first
fate-filter-amix-first: CMD = ffmpeg -filter_complex amix=duration=first -ss 4 -i $(SRC) -i $(SRC1) -f f32le -
fate-filter-amix-first: REF = $(SAMPLES)/filter/amix_first.pcm
FATE_AMIX += fate-filter-amix-transition
fate-filter-amix-transition: tests/data/asynth-44100-2-3.wav
fate-filter-amix-transition: SRC2 = $(TARGET_PATH)/tests/data/asynth-44100-2-3.wav
fate-filter-amix-transition: CMD = ffmpeg -filter_complex amix=inputs=3:dropout_transition=0.5 -i $(SRC) -ss 2 -i $(SRC1) -ss 4 -i $(SRC2) -f f32le -
fate-filter-amix-transition: REF = $(SAMPLES)/filter/amix_transition.pcm
$(FATE_AMIX): tests/data/asynth-44100-2.wav tests/data/asynth-44100-2-2.wav
$(FATE_AMIX): SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav
$(FATE_AMIX): SRC1 = $(TARGET_PATH)/tests/data/asynth-44100-2-2.wav
$(FATE_AMIX): CMP = oneoff
FATE_FILTER += $(FATE_AMIX)
FATE_SAMPLES_AVCONV += $(FATE_AMIX)
FATE_ASYNCTS += fate-filter-asyncts
fate-filter-asyncts: SRC = $(SAMPLES)/nellymoser/nellymoser-discont.flv
fate-filter-asyncts: CMD = pcm -i $(SRC) -af aresample=min_comp=0.001:min_hard_comp=0.1
@ -7,4 +29,18 @@ fate-filter-asyncts: REF = $(SAMPLES)/nellymoser/nellymoser-discont.pcm
FATE_FILTER += $(FATE_ASYNCTS)
FATE_SAMPLES_AVCONV += $(FATE_ASYNCTS)
fate-filter-delogo: CMD = framecrc -i $(SAMPLES)/real/rv30.rm -vf delogo=show=0:x=290:y=25:w=26:h=16 -an
FATE_FILTER += fate-filter-delogo
FATE_SAMPLES_AVCONV += fate-filter-delogo
FATE_YADIF += fate-filter-yadif-mode0
fate-filter-yadif-mode0: CMD = framecrc -i $(SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vf yadif=0
FATE_YADIF += fate-filter-yadif-mode1
fate-filter-yadif-mode1: CMD = framecrc -i $(SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vf yadif=1
FATE_FILTER += $(FATE_YADIF)
FATE_SAMPLES_AVCONV += $(FATE_YADIF)
fate-filter: $(FATE_FILTER)

@ -0,0 +1,110 @@
#tb 0: 32768/982057
0, 0, 0, 1, 126720, 0x689de87e
0, 1, 1, 1, 126720, 0x3db9e91c
0, 2, 2, 1, 126720, 0x3db9e91c
0, 3, 3, 1, 126720, 0x3db9e91c
0, 4, 4, 1, 126720, 0xfa6ae95e
0, 5, 5, 1, 126720, 0x5bcbf0e6
0, 6, 6, 1, 126720, 0x94a0f126
0, 7, 7, 1, 126720, 0x0250f106
0, 8, 8, 1, 126720, 0xcf6ab4bc
0, 9, 9, 1, 126720, 0x44aeb57c
0, 10, 10, 1, 126720, 0x33b0b5bc
0, 11, 11, 1, 126720, 0xc4bab591
0, 12, 12, 1, 126720, 0xa492b5ec
0, 13, 13, 1, 126720, 0x1459b85c
0, 14, 14, 1, 126720, 0x806fb8dc
0, 15, 15, 1, 126720, 0xd241b871
0, 16, 16, 1, 126720, 0x698eb5cc
0, 17, 17, 1, 126720, 0x4719aa98
0, 18, 18, 1, 126720, 0x9ca1962c
0, 19, 19, 1, 126720, 0x18cda460
0, 20, 20, 1, 126720, 0xc230b716
0, 21, 21, 1, 126720, 0x8451a4e2
0, 22, 22, 1, 126720, 0x59e9a7ea
0, 23, 23, 1, 126720, 0xc77ca73d
0, 24, 24, 1, 126720, 0x725fb976
0, 25, 25, 1, 126720, 0xb30da3b3
0, 26, 26, 1, 126720, 0x7af2ea86
0, 27, 27, 1, 126720, 0x40d4b4eb
0, 28, 28, 1, 126720, 0x49d00307
0, 29, 29, 1, 126720, 0x44c8848e
0, 30, 30, 1, 126720, 0xc6990101
0, 31, 31, 1, 126720, 0x2e01b963
0, 32, 32, 1, 126720, 0xd0e903f0
0, 33, 33, 1, 126720, 0x3457d592
0, 34, 34, 1, 126720, 0x4f1ddb3c
0, 35, 35, 1, 126720, 0x3980ace5
0, 36, 36, 1, 126720, 0xb1e37954
0, 37, 37, 1, 126720, 0x619fc554
0, 38, 38, 1, 126720, 0x945fb39e
0, 39, 39, 1, 126720, 0xb1d5e0ce
0, 40, 40, 1, 126720, 0xf26e1dcc
0, 41, 41, 1, 126720, 0x04d5783e
0, 42, 42, 1, 126720, 0xbaa0479e
0, 43, 43, 1, 126720, 0x20d88b01
0, 44, 44, 1, 126720, 0x59d99901
0, 45, 45, 1, 126720, 0x1c6e09f6
0, 46, 46, 1, 126720, 0xeec50fc5
0, 47, 47, 1, 126720, 0xb3a92827
0, 48, 48, 1, 126720, 0xf62dd2b6
0, 49, 49, 1, 126720, 0x75b1e619
0, 50, 50, 1, 126720, 0x6bbce2c0
0, 51, 51, 1, 126720, 0xd93e023c
0, 52, 52, 1, 126720, 0xbbe8e7c2
0, 53, 53, 1, 126720, 0x2272ec17
0, 54, 54, 1, 126720, 0xf5e4ee6e
0, 55, 55, 1, 126720, 0x751d2607
0, 56, 56, 1, 126720, 0x44c499c9
0, 57, 57, 1, 126720, 0xddccd842
0, 58, 58, 1, 126720, 0x508dd214
0, 59, 59, 1, 126720, 0x8eb10272
0, 60, 60, 1, 126720, 0x7224b1c6
0, 61, 61, 1, 126720, 0x50ff456c
0, 62, 62, 1, 126720, 0xa81e2731
0, 63, 63, 1, 126720, 0x7e50456d
0, 64, 64, 1, 126720, 0x44802978
0, 65, 65, 1, 126720, 0x86e88743
0, 66, 66, 1, 126720, 0x0b1087d6
0, 67, 67, 1, 126720, 0xb0227d21
0, 68, 68, 1, 126720, 0x29d10bd2
0, 69, 69, 1, 126720, 0x04b43afa
0, 70, 70, 1, 126720, 0xb48e9698
0, 71, 71, 1, 126720, 0x75d760fb
0, 72, 72, 1, 126720, 0xa2ab1fdb
0, 73, 73, 1, 126720, 0xec30a5ee
0, 74, 74, 1, 126720, 0xbdab7c8c
0, 75, 75, 1, 126720, 0xac5c3f2c
0, 76, 76, 1, 126720, 0xce6350be
0, 77, 77, 1, 126720, 0xb109657a
0, 78, 78, 1, 126720, 0x723865a4
0, 79, 79, 1, 126720, 0xa9869124
0, 80, 80, 1, 126720, 0xc41af558
0, 81, 81, 1, 126720, 0xcbe6a402
0, 82, 82, 1, 126720, 0xb6735ecb
0, 83, 83, 1, 126720, 0xba3059f2
0, 84, 84, 1, 126720, 0xe7d63b8d
0, 85, 85, 1, 126720, 0x8f115906
0, 86, 86, 1, 126720, 0xaf6a8dcb
0, 87, 87, 1, 126720, 0xb73e846e
0, 88, 88, 1, 126720, 0xedd6380f
0, 89, 89, 1, 126720, 0xd9026acf
0, 90, 90, 1, 126720, 0xa03a650b
0, 91, 91, 1, 126720, 0x262765bc
0, 92, 92, 1, 126720, 0xaaa9ded1
0, 93, 93, 1, 126720, 0xe4f42665
0, 94, 94, 1, 126720, 0x78daf760
0, 95, 95, 1, 126720, 0x3b0c6ef8
0, 96, 96, 1, 126720, 0xb745df80
0, 97, 97, 1, 126720, 0x08e57b90
0, 98, 98, 1, 126720, 0x6f883ab0
0, 99, 99, 1, 126720, 0x934b4dd5
0, 100, 100, 1, 126720, 0x762f108f
0, 101, 101, 1, 126720, 0x91ee0f2b
0, 102, 102, 1, 126720, 0x9af6e5e8
0, 103, 103, 1, 126720, 0xdcd95e0a
0, 104, 104, 1, 126720, 0x22c33a6e
0, 105, 105, 1, 126720, 0x21c1b7f4
0, 106, 106, 1, 126720, 0x0a66a1ed
0, 107, 107, 1, 126720, 0x53fea81b
0, 108, 108, 1, 126720, 0x597f5567

@ -0,0 +1,32 @@
#tb 0: 1/25
0, 9, 9, 1, 622080, 0x1511cae9
0, 10, 10, 1, 622080, 0x6e77e746
0, 11, 11, 1, 622080, 0x89aac777
0, 12, 12, 1, 622080, 0x7e0a9335
0, 13, 13, 1, 622080, 0x5f34759b
0, 14, 14, 1, 622080, 0xfac498a6
0, 15, 15, 1, 622080, 0xe60e7a9e
0, 16, 16, 1, 622080, 0x44875bbd
0, 17, 17, 1, 622080, 0xfa761aab
0, 18, 18, 1, 622080, 0x59be119c
0, 19, 19, 1, 622080, 0x21316b36
0, 20, 20, 1, 622080, 0x929fde5b
0, 21, 21, 1, 622080, 0xfca8990c
0, 22, 22, 1, 622080, 0x1ec87d02
0, 23, 23, 1, 622080, 0x5768eea0
0, 24, 24, 1, 622080, 0x1a0894ab
0, 25, 25, 1, 622080, 0xb4e61323
0, 26, 26, 1, 622080, 0xb773341a
0, 27, 27, 1, 622080, 0x8a914cf7
0, 28, 28, 1, 622080, 0xf1cfbc7d
0, 29, 29, 1, 622080, 0xebaeb317
0, 30, 30, 1, 622080, 0xbae9adf4
0, 31, 31, 1, 622080, 0x593544fd
0, 32, 32, 1, 622080, 0x2cd8ec0b
0, 33, 33, 1, 622080, 0x8032d9d4
0, 34, 34, 1, 622080, 0x5c67ace7
0, 35, 35, 1, 622080, 0x95714528
0, 36, 36, 1, 622080, 0xa11cbed2
0, 37, 37, 1, 622080, 0x7389f8f1
0, 38, 38, 1, 622080, 0xa694f3f2
0, 39, 39, 1, 622080, 0xac3a3d09

@ -0,0 +1,34 @@
#tb 0: 1/25
0, 9, 9, 1, 622080, 0x1511cae9
0, 10, 10, 1, 622080, 0xb88ca855
0, 11, 11, 1, 622080, 0x6e77e746
0, 12, 12, 1, 622080, 0x5da19198
0, 13, 13, 1, 622080, 0xee31c8a8
0, 14, 14, 1, 622080, 0xcbb7aac5
0, 15, 15, 1, 622080, 0x19972f1a
0, 16, 16, 1, 622080, 0xac7d34b9
0, 17, 17, 1, 622080, 0x4adfe592
0, 18, 18, 1, 622080, 0x5d738330
0, 19, 19, 1, 622080, 0xb60b4447
0, 20, 20, 1, 622080, 0x1e11acf4
0, 21, 21, 1, 622080, 0x5ed635d0
0, 22, 22, 1, 622080, 0x939857af
0, 23, 23, 1, 622080, 0x530b28fd
0, 24, 24, 1, 622080, 0x3bc0d5d3
0, 25, 25, 1, 622080, 0x77e0fe99
0, 26, 26, 1, 622080, 0xd2151c1e
0, 27, 27, 1, 622080, 0xe021a815
0, 28, 28, 1, 622080, 0xceae4f12
0, 29, 29, 1, 622080, 0x4c2f3330
0, 30, 30, 1, 622080, 0xf534c392
0, 31, 31, 1, 622080, 0x88f01c11
0, 32, 32, 1, 622080, 0x654d5df2
0, 33, 33, 1, 622080, 0x89ef6f8a
0, 34, 34, 1, 622080, 0x78a7b5f1
0, 35, 35, 1, 622080, 0x8152d67f
0, 36, 36, 1, 622080, 0x6590ff5f
0, 37, 37, 1, 622080, 0x51d2be96
0, 38, 38, 1, 622080, 0x483f65f7
0, 39, 39, 1, 622080, 0x7a69143d
0, 40, 40, 1, 622080, 0xeccc58ff
0, 41, 41, 1, 622080, 0xc4d2c370
Loading…
Cancel
Save