Fix and enable horizontal >=SSE2 mbedge loopfilter.

Originally committed as revision 24409 to svn://svn.ffmpeg.org/ffmpeg/trunk
oldabi
Ronald S. Bultje 15 years ago
parent c7b1d9768c
commit 003243c3c2
  1. 12
      libavcodec/x86/vp8dsp-init.c
  2. 4
      libavcodec/x86/vp8dsp.asm

@ -343,16 +343,16 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_mmxext;
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_sse2;
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_sse2;
}
if (mm_flags & FF_MM_SSE2) {
c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
//c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2;
//c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_sse2;
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_sse2;
}
if (mm_flags & FF_MM_SSSE3) {
@ -372,9 +372,9 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
c->vp8_v_loop_filter16y = ff_vp8_v_loop_filter16y_mbedge_ssse3;
//c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3;
c->vp8_h_loop_filter16y = ff_vp8_h_loop_filter16y_mbedge_ssse3;
c->vp8_v_loop_filter8uv = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
//c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
c->vp8_h_loop_filter8uv = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
}
if (mm_flags & FF_MM_SSE4) {

@ -2513,8 +2513,8 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5
%else ; sse2 (h)
lea dst8_reg, [dst8_reg+mstride_reg+1]
WRITE_4x4D 1, 2, 3, 4, dst_reg, dst2_reg, dst8_reg, mstride_reg, stride_reg, %4
add dst_reg, 4
add dst8_reg, 4
lea dst_reg, [dst2_reg+mstride_reg+4]
lea dst8_reg, [dst8_reg+mstride_reg+4]
WRITE_8W m5, m5, dst2_reg, dst_reg, mstride_reg, stride_reg
WRITE_8W m6, m6, dst2_reg, dst8_reg, mstride_reg, stride_reg
%endif

Loading…
Cancel
Save