diff --git a/libswscale/x86/input.asm b/libswscale/x86/input.asm index fcdfe2fcd8..6de6733faa 100644 --- a/libswscale/x86/input.asm +++ b/libswscale/x86/input.asm @@ -133,23 +133,18 @@ SECTION .text ; %2 = rgb or bgr %macro RGB24_TO_Y_FN 2-3 cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table -%if mmsize == 8 - mova m5, [%2_Ycoeff_12x4] - mova m6, [%2_Ycoeff_3x56] -%define coeff1 m5 -%define coeff2 m6 -%elif ARCH_X86_64 +%if ARCH_X86_64 mova m8, [%2_Ycoeff_12x4] mova m9, [%2_Ycoeff_3x56] %define coeff1 m8 %define coeff2 m9 -%else ; x86-32 && mmsize == 16 +%else ; x86-32 %define coeff1 [%2_Ycoeff_12x4] %define coeff2 [%2_Ycoeff_3x56] -%endif ; x86-32/64 && mmsize == 8/16 -%if (ARCH_X86_64 || mmsize == 8) && %0 == 3 +%endif ; x86-32/64 +%if ARCH_X86_64 && %0 == 3 jmp mangle(private_prefix %+ _ %+ %3 %+ 24ToY %+ SUFFIX).body -%else ; (ARCH_X86_64 && %0 == 3) || mmsize == 8 +%else ; ARCH_X86_64 && %0 == 3 .body: %if cpuflag(ssse3) mova m7, [shuf_rgb_12x4] @@ -184,7 +179,6 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table movd m1, [srcq+2] ; (byte) { R0, B1, G1, R1 } movd m2, [srcq+6] ; (byte) { B2, G2, R2, B3 } movd m3, [srcq+8] ; (byte) { R2, B3, G3, R3 } -%if mmsize == 16 ; i.e. sse2 punpckldq m0, m2 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 } punpckldq m1, m3 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 } movd m2, [srcq+12] ; (byte) { B4, G4, R4, B5 } @@ -193,7 +187,6 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table movd m6, [srcq+20] ; (byte) { R6, B7, G7, R7 } punpckldq m2, m5 ; (byte) { B4, G4, R4, B5, B6, G6, R6, B7 } punpckldq m3, m6 ; (byte) { R4, B5, G5, R5, R6, B7, G7, R7 } -%endif ; mmsize == 16 punpcklbw m0, m7 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 } punpcklbw m1, m7 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 } punpcklbw m2, m7 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } @@ -215,7 +208,7 @@ cglobal %2 %+ 24ToY, 6, 6, %1, dst, src, u1, u2, w, table add wq, mmsize jl .loop REP_RET -%endif ; (ARCH_X86_64 && %0 == 3) || mmsize == 8 +%endif ; ARCH_X86_64 && %0 == 3 %endmacro ; %1 = nr. of XMM registers @@ -275,12 +268,10 @@ cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table movd m1, [srcq+2] ; (byte) { R0, B1, G1, R1 } movd m4, [srcq+6] ; (byte) { B2, G2, R2, B3 } movd m5, [srcq+8] ; (byte) { R2, B3, G3, R3 } -%if mmsize == 16 punpckldq m0, m4 ; (byte) { B0, G0, R0, B1, B2, G2, R2, B3 } punpckldq m1, m5 ; (byte) { R0, B1, G1, R1, R2, B3, G3, R3 } movd m4, [srcq+12] ; (byte) { B4, G4, R4, B5 } movd m5, [srcq+14] ; (byte) { R4, B5, G5, R5 } -%endif ; mmsize == 16 punpcklbw m0, m7 ; (word) { B0, G0, R0, B1, B2, G2, R2, B3 } punpcklbw m1, m7 ; (word) { R0, B1, G1, R1, R2, B3, G3, R3 } %endif ; cpuflag(ssse3) @@ -294,12 +285,10 @@ cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table pshufb m5, m4, shuf_rgb2 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 } pshufb m4, shuf_rgb1 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } %else ; !cpuflag(ssse3) -%if mmsize == 16 movd m1, [srcq+18] ; (byte) { B6, G6, R6, B7 } movd m3, [srcq+20] ; (byte) { R6, B7, G7, R7 } punpckldq m4, m1 ; (byte) { B4, G4, R4, B5, B6, G6, R6, B7 } punpckldq m5, m3 ; (byte) { R4, B5, G5, R5, R6, B7, G7, R7 } -%endif ; mmsize == 16 && !cpuflag(ssse3) punpcklbw m4, m7 ; (word) { B4, G4, R4, B5, B6, G6, R6, B7 } punpcklbw m5, m7 ; (word) { R4, B5, G5, R5, R6, B7, G7, R7 } %endif ; cpuflag(ssse3) @@ -320,13 +309,8 @@ cglobal %2 %+ 24ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table psrad m4, 9 packssdw m0, m1 ; (word) { U[0-7] } packssdw m2, m4 ; (word) { V[0-7] } -%if mmsize == 8 mova [dstUq+wq], m0 mova [dstVq+wq], m2 -%else ; mmsize == 16 - mova [dstUq+wq], m0 - mova [dstVq+wq], m2 -%endif ; mmsize == 8/16 add wq, mmsize jl .loop REP_RET @@ -342,11 +326,6 @@ RGB24_TO_UV_FN %2, rgb RGB24_TO_UV_FN %2, bgr, rgb %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -RGB24_FUNCS 0, 0 -%endif - INIT_XMM sse2 RGB24_FUNCS 10, 12 @@ -483,13 +462,8 @@ cglobal %2%3%4%5 %+ ToUV, 7, 7, %1, dstU, dstV, u1, src, u2, w, table psrad m1, 9 packssdw m0, m4 ; (word) { U[0-7] } packssdw m2, m1 ; (word) { V[0-7] } -%if mmsize == 8 mova [dstUq+wq], m0 mova [dstVq+wq], m2 -%else ; mmsize == 16 - mova [dstUq+wq], m0 - mova [dstVq+wq], m2 -%endif ; mmsize == 8/16 add wq, mmsize jl .loop sub wq, mmsize - 1 @@ -535,11 +509,6 @@ RGB32_TO_UV_FN %2, a, r, g, b, rgba RGB32_TO_UV_FN %2, a, b, g, r, rgba %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -RGB32_FUNCS 0, 0 -%endif - INIT_XMM sse2 RGB32_FUNCS 8, 12 @@ -588,25 +557,18 @@ cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w movsxd wq, wd %endif add dstq, wq -%if mmsize == 16 test srcq, 15 -%endif lea srcq, [srcq+wq*2] %ifidn %2, yuyv pcmpeqb m2, m2 ; (byte) { 0xff } x 16 psrlw m2, 8 ; (word) { 0x00ff } x 8 %endif ; yuyv -%if mmsize == 16 jnz .loop_u_start neg wq LOOP_YUYV_TO_Y a, %2 .loop_u_start: neg wq LOOP_YUYV_TO_Y u, %2 -%else ; mmsize == 8 - neg wq - LOOP_YUYV_TO_Y a, %2 -%endif ; mmsize == 8/16 %endmacro ; %1 = a (aligned) or u (unaligned) @@ -632,16 +594,9 @@ cglobal %2ToY, 5, 5, %1, dst, unused0, unused1, src, w packuswb m0, m1 ; (byte) { U0, V0, ..., U7, V7 } pand m1, m0, m2 ; (word) { U0, U1, ..., U7 } psrlw m0, 8 ; (word) { V0, V1, ..., V7 } -%if mmsize == 16 packuswb m1, m0 ; (byte) { U0, ... U7, V1, ... V7 } movh [dstUq+wq], m1 movhps [dstVq+wq], m1 -%else ; mmsize == 8 - packuswb m1, m1 ; (byte) { U0, ... U3 } - packuswb m0, m0 ; (byte) { V0, ... V3 } - movh [dstUq+wq], m1 - movh [dstVq+wq], m0 -%endif ; mmsize == 8/16 add wq, mmsize / 2 jl .loop_%1 REP_RET @@ -661,24 +616,24 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w %endif add dstUq, wq add dstVq, wq -%if mmsize == 16 && %0 == 2 +%if %0 == 2 test srcq, 15 %endif lea srcq, [srcq+wq*4] pcmpeqb m2, m2 ; (byte) { 0xff } x 16 psrlw m2, 8 ; (word) { 0x00ff } x 8 ; NOTE: if uyvy+avx, u/a are identical -%if mmsize == 16 && %0 == 2 +%if %0 == 2 jnz .loop_u_start neg wq LOOP_YUYV_TO_UV a, %2 .loop_u_start: neg wq LOOP_YUYV_TO_UV u, %2 -%else ; mmsize == 8 +%else neg wq LOOP_YUYV_TO_UV a, %2 -%endif ; mmsize == 8/16 +%endif %endmacro ; %1 = a (aligned) or u (unaligned) @@ -716,35 +671,18 @@ cglobal %2ToUV, 4, 5, %1, dstU, dstV, unused, src, w %endif add dstUq, wq add dstVq, wq -%if mmsize == 16 test srcq, 15 -%endif lea srcq, [srcq+wq*2] pcmpeqb m5, m5 ; (byte) { 0xff } x 16 psrlw m5, 8 ; (word) { 0x00ff } x 8 -%if mmsize == 16 jnz .loop_u_start neg wq LOOP_NVXX_TO_UV a, %2 .loop_u_start: neg wq LOOP_NVXX_TO_UV u, %2 -%else ; mmsize == 8 - neg wq - LOOP_NVXX_TO_UV a, %2 -%endif ; mmsize == 8/16 %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -YUYV_TO_Y_FN 0, yuyv -YUYV_TO_Y_FN 0, uyvy -YUYV_TO_UV_FN 0, yuyv -YUYV_TO_UV_FN 0, uyvy -NVXX_TO_UV_FN 0, nv12 -NVXX_TO_UV_FN 0, nv21 -%endif - INIT_XMM sse2 YUYV_TO_Y_FN 3, yuyv YUYV_TO_Y_FN 2, uyvy diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index 1e498fddf6..84e94baaf6 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -312,11 +312,9 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset %endif ; %1 == 8/9/10/16 %endmacro -%if ARCH_X86_32 +%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 INIT_MMX mmxext yuv2planeX_fn 8, 0, 7 -yuv2planeX_fn 9, 0, 5 -yuv2planeX_fn 10, 0, 5 %endif INIT_XMM sse2 @@ -407,19 +405,11 @@ cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset movq m3, [ditherq] ; dither test offsetd, offsetd jz .no_rot -%if mmsize == 16 punpcklqdq m3, m3 -%endif ; mmsize == 16 PALIGNR m3, m3, 3, m2 .no_rot: -%if mmsize == 8 - mova m2, m3 - punpckhbw m3, m4 ; byte->word - punpcklbw m2, m4 ; byte->word -%else punpcklbw m3, m4 mova m2, m3 -%endif %elif %1 == 9 pxor m4, m4 mova m3, [pw_512] @@ -431,36 +421,22 @@ cglobal yuv2plane1_%1, %3, %3, %2, src, dst, w, dither, offset %else ; %1 == 16 %if cpuflag(sse4) ; sse4/avx mova m4, [pd_4] -%else ; mmx/sse2 +%else ; sse2 mova m4, [pd_4min0x40000] mova m5, [minshort] -%endif ; mmx/sse2/sse4/avx +%endif ; sse2/sse4/avx %endif ; %1 == .. ; actual pixel scaling -%if mmsize == 8 - yuv2plane1_mainloop %1, a -%else ; mmsize == 16 test dstq, 15 jnz .unaligned yuv2plane1_mainloop %1, a REP_RET .unaligned: yuv2plane1_mainloop %1, u -%endif ; mmsize == 8/16 REP_RET %endmacro -%if ARCH_X86_32 -INIT_MMX mmx -yuv2plane1_fn 8, 0, 5 -yuv2plane1_fn 16, 0, 3 - -INIT_MMX mmxext -yuv2plane1_fn 9, 0, 3 -yuv2plane1_fn 10, 0, 3 -%endif - INIT_XMM sse2 yuv2plane1_fn 8, 5, 5 yuv2plane1_fn 9, 5, 3 diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index 83cabff722..c62ae3dcc2 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -61,13 +61,11 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi %define mov32 mov %endif ; x86-64 %if %2 == 19 -%if mmsize == 8 ; mmx - mova m2, [max_19bit_int] -%elif cpuflag(sse4) +%if cpuflag(sse4) mova m2, [max_19bit_int] %else ; ssse3/sse2 mova m2, [max_19bit_flt] -%endif ; mmx/sse2/ssse3/sse4 +%endif ; sse2/ssse3/sse4 %endif ; %2 == 19 %if %1 == 16 mova m6, [minshort] @@ -144,12 +142,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi pmaddwd m1, [filterq+wq*8+mmsize*1] ; *= filter[{8,9,..,14,15}] ; add up horizontally (4 srcpix * 4 coefficients -> 1 dstpix) -%if mmsize == 8 ; mmx - movq m4, m0 - punpckldq m0, m1 - punpckhdq m4, m1 - paddd m0, m4 -%elif notcpuflag(ssse3) ; sse2 +%if notcpuflag(ssse3) ; sse2 mova m4, m0 shufps m0, m1, 10001000b shufps m4, m1, 11011101b @@ -159,7 +152,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi ; filter[{ 4, 5, 6, 7}]*src[filterPos[1]+{0,1,2,3}], ; filter[{ 8, 9,10,11}]*src[filterPos[2]+{0,1,2,3}], ; filter[{12,13,14,15}]*src[filterPos[3]+{0,1,2,3}] -%endif ; mmx/sse2/ssse3/sse4 +%endif ; sse2/ssse3/sse4 %else ; %3 == 8, i.e. filterSize == 8 scaling ; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5 mov32 pos0q, dword [fltposq+wq*2+0] ; filterPos[0] @@ -197,14 +190,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi pmaddwd m5, [filterq+wq*8+mmsize*3] ; *= filter[{24,25,..,30,31}] ; add up horizontally (8 srcpix * 8 coefficients -> 1 dstpix) -%if mmsize == 8 - paddd m0, m1 - paddd m4, m5 - movq m1, m0 - punpckldq m0, m4 - punpckhdq m1, m4 - paddd m0, m1 -%elif notcpuflag(ssse3) ; sse2 +%if notcpuflag(ssse3) ; sse2 %if %1 == 8 %define mex m6 %else @@ -233,7 +219,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi ; filter[{ 8, 9,...,14,15}]*src[filterPos[1]+{0,1,...,6,7}], ; filter[{16,17,...,22,23}]*src[filterPos[2]+{0,1,...,6,7}], ; filter[{24,25,...,30,31}]*src[filterPos[3]+{0,1,...,6,7}] -%endif ; mmx/sse2/ssse3/sse4 +%endif ; sse2/ssse3/sse4 %endif ; %3 == 4/8 %else ; %3 == X, i.e. any filterSize scaling @@ -274,7 +260,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi mov srcq, srcmemmp .innerloop: - ; load 2x4 (mmx) or 2x8 (sse) source pixels into m0/m1 -> m4/m5 + ; load 2x8 (sse) source pixels into m0/m1 -> m4/m5 movbh m0, [srcq+ pos0q *srcmul] ; src[filterPos[0] + {0,1,2,3(,4,5,6,7)}] movbh m1, [srcq+(pos1q+dlt)*srcmul] ; src[filterPos[1] + {0,1,2,3(,4,5,6,7)}] %if %1 == 8 @@ -319,12 +305,6 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi lea filterq, [filterq+(fltsizeq+dlt)*2] -%if mmsize == 8 ; mmx - movq m0, m4 - punpckldq m4, m5 - punpckhdq m0, m5 - paddd m0, m4 -%else ; mmsize == 16 %if notcpuflag(ssse3) ; sse2 mova m1, m4 punpcklqdq m4, m5 @@ -344,7 +324,6 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi phaddd m4, m4 SWAP 0, 4 %endif ; sse2/ssse3/sse4 -%endif ; mmsize == 8/16 %endif ; %3 ==/!= X %if %1 == 16 ; add 0x8000 * sum(coeffs), i.e. back from signed -> unsigned @@ -372,7 +351,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi %endif ; %3 ==/!= X %endif ; %2 == 15/19 %ifnidn %3, X - add wq, (mmsize<use_mmx_vfilter && !(c->flags & SWS_ACCURATE_RND)) { -#if HAVE_MMX_EXTERNAL - if (EXTERNAL_MMX(cpu_flags)) - c->yuv2planeX = yuv2yuvX_mmx; -#endif #if HAVE_MMXEXT_EXTERNAL if (EXTERNAL_MMXEXT(cpu_flags)) c->yuv2planeX = yuv2yuvX_mmxext; @@ -496,6 +469,14 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c) c->yuv2planeX = yuv2yuvX_avx2; #endif } +#if ARCH_X86_32 && !HAVE_ALIGNED_STACK + // The better yuv2planeX_8 functions need aligned stack on x86-32, + // so we use MMXEXT in this case if they are not available. + if (EXTERNAL_MMXEXT(cpu_flags)) { + if (c->dstBpc == 8 && !c->use_mmx_vfilter) + c->yuv2planeX = ff_yuv2planeX_8_mmxext; + } +#endif /* ARCH_X86_32 && !HAVE_ALIGNED_STACK */ #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \ if (c->srcBpc == 8) { \ @@ -519,12 +500,6 @@ av_cold void ff_sws_init_swscale_x86(SwsContext *c) ff_hscale16to19_ ## filtersize ## _ ## opt1; \ } \ } while (0) -#define ASSIGN_MMX_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ - switch (filtersize) { \ - case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ - case 8: ASSIGN_SCALE_FUNC2(hscalefn, 8, opt1, opt2); break; \ - default: ASSIGN_SCALE_FUNC2(hscalefn, X, opt1, opt2); break; \ - } #define ASSIGN_VSCALEX_FUNC(vscalefn, opt, do_16_case, condition_8bit) \ switch(c->dstBpc){ \ case 16: do_16_case; break; \ @@ -546,46 +521,6 @@ switch(c->dstBpc){ \ if (!c->chrSrcHSubSample) \ c->chrToYV12 = ff_ ## x ## ToUV_ ## opt; \ break -#if ARCH_X86_32 - if (EXTERNAL_MMX(cpu_flags)) { - ASSIGN_MMX_SCALE_FUNC(c->hyScale, c->hLumFilterSize, mmx, mmx); - ASSIGN_MMX_SCALE_FUNC(c->hcScale, c->hChrFilterSize, mmx, mmx); - ASSIGN_VSCALE_FUNC(c->yuv2plane1, mmx, mmxext, cpu_flags & AV_CPU_FLAG_MMXEXT); - - switch (c->srcFormat) { - case AV_PIX_FMT_YA8: - c->lumToYV12 = ff_yuyvToY_mmx; - if (c->needAlpha) - c->alpToYV12 = ff_uyvyToY_mmx; - break; - case AV_PIX_FMT_YUYV422: - c->lumToYV12 = ff_yuyvToY_mmx; - c->chrToYV12 = ff_yuyvToUV_mmx; - break; - case AV_PIX_FMT_UYVY422: - c->lumToYV12 = ff_uyvyToY_mmx; - c->chrToYV12 = ff_uyvyToUV_mmx; - break; - case AV_PIX_FMT_NV12: - c->chrToYV12 = ff_nv12ToUV_mmx; - break; - case AV_PIX_FMT_NV21: - c->chrToYV12 = ff_nv21ToUV_mmx; - break; - case_rgb(rgb24, RGB24, mmx); - case_rgb(bgr24, BGR24, mmx); - case_rgb(bgra, BGRA, mmx); - case_rgb(rgba, RGBA, mmx); - case_rgb(abgr, ABGR, mmx); - case_rgb(argb, ARGB, mmx); - default: - break; - } - } - if (EXTERNAL_MMXEXT(cpu_flags)) { - ASSIGN_VSCALEX_FUNC(c->yuv2planeX, mmxext, , 1); - } -#endif /* ARCH_X86_32 */ #define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \ switch (filtersize) { \ case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt1, opt2); break; \ diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 97d8cae613..6190fcb4fe 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -29,13 +29,8 @@ #undef PREFETCH -#if COMPILE_TEMPLATE_MMXEXT #define REAL_MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" #define MOVNTQ2 "movntq " -#else -#define REAL_MOVNTQ(a,b) "movq " #a ", " #b " \n\t" -#define MOVNTQ2 "movq " -#endif #define MOVNTQ(a,b) REAL_MOVNTQ(a,b) #define YSCALEYUV2PACKEDX_UV \ @@ -600,13 +595,8 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, "cmp "dstw", "#index" \n\t"\ " jb 1b \n\t" -#if COMPILE_TEMPLATE_MMXEXT #undef WRITEBGR24 #define WRITEBGR24(dst, dstw, index) WRITEBGR24MMXEXT(dst, dstw, index) -#else -#undef WRITEBGR24 -#define WRITEBGR24(dst, dstw, index) WRITEBGR24MMX(dst, dstw, index) -#endif #if HAVE_6REGS static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, @@ -1478,17 +1468,13 @@ static av_cold void RENAME(sws_init_swscale)(SwsContext *c) } if (c->srcBpc == 8 && c->dstBpc <= 14) { - // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one). -#if COMPILE_TEMPLATE_MMXEXT - if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) { - c->hyscale_fast = ff_hyscale_fast_mmxext; - c->hcscale_fast = ff_hcscale_fast_mmxext; - } else { -#endif /* COMPILE_TEMPLATE_MMXEXT */ - c->hyscale_fast = NULL; - c->hcscale_fast = NULL; -#if COMPILE_TEMPLATE_MMXEXT - } -#endif /* COMPILE_TEMPLATE_MMXEXT */ + // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one). + if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) { + c->hyscale_fast = ff_hyscale_fast_mmxext; + c->hcscale_fast = ff_hcscale_fast_mmxext; + } else { + c->hyscale_fast = NULL; + c->hcscale_fast = NULL; + } } }