postproc: Fix unprotected inline asm

Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
13 years ago · e20ac54f3e
parent e592fd0ed1
commit e20ac54f3e
2 changed files with 114 additions and 114 deletions
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@ -80,9 +80,9 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-//#undef HAVE_MMXEXT
-//#define HAVE_AMD3DNOW
-//#undef HAVE_MMX
+//#undef HAVE_MMXEXT_INLINE
+//#define HAVE_AMD3DNOW_INLINE
+//#undef HAVE_MMX_INLINE
 //#undef ARCH_X86
 //#define DEBUG_BRIGHTNESS
 #include "postprocess.h"
@ -116,7 +116,7 @@ const char *postproc_license(void)
 #define TEMP_STRIDE 8
 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet

-#if ARCH_X86
+#if ARCH_X86 && HAVE_INLINE_ASM
 DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
 DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
 DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
@ -165,7 +165,7 @@ static const char *replaceTable[]=
 };


-#if ARCH_X86
+#if ARCH_X86 && HAVE_INLINE_ASM
 static inline void prefetchnta(void *p)
 {
    __asm__ volatile(   "prefetchnta (%0)\n\t"
@ -544,27 +544,27 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
 #define COMPILE_ALTIVEC
 #endif //HAVE_ALTIVEC

-#if ARCH_X86
+#if ARCH_X86 && HAVE_INLINE_ASM

-#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMXEXT) || CONFIG_RUNTIME_CPUDETECT
+#if (HAVE_MMX_INLINE && !HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
 #define COMPILE_MMX
 #endif

-#if HAVE_MMXEXT || CONFIG_RUNTIME_CPUDETECT
+#if HAVE_MMXEXT_INLINE || CONFIG_RUNTIME_CPUDETECT
 #define COMPILE_MMX2
 #endif

-#if (HAVE_AMD3DNOW && !HAVE_MMXEXT) || CONFIG_RUNTIME_CPUDETECT
+#if (HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
 #define COMPILE_3DNOW
 #endif
 #endif /* ARCH_X86 */

-#undef HAVE_MMX
-#define HAVE_MMX 0
-#undef HAVE_MMXEXT
-#define HAVE_MMXEXT 0
-#undef HAVE_AMD3DNOW
-#define HAVE_AMD3DNOW 0
+#undef HAVE_MMX_INLINE
+#define HAVE_MMX_INLINE 0
+#undef HAVE_MMXEXT_INLINE
+#define HAVE_MMXEXT_INLINE 0
+#undef HAVE_AMD3DNOW_INLINE
+#define HAVE_AMD3DNOW_INLINE 0
 #undef HAVE_ALTIVEC
 #define HAVE_ALTIVEC 0

@ -585,8 +585,8 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
 //MMX versions
 #ifdef COMPILE_MMX
 #undef RENAME
-#undef HAVE_MMX
-#define HAVE_MMX 1
+#undef HAVE_MMX_INLINE
+#define HAVE_MMX_INLINE 1
 #define RENAME(a) a ## _MMX
 #include "postprocess_template.c"
 #endif
@ -594,10 +594,10 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
 //MMX2 versions
 #ifdef COMPILE_MMX2
 #undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMXEXT
-#define HAVE_MMX 1
-#define HAVE_MMXEXT 1
+#undef HAVE_MMX_INLINE
+#undef HAVE_MMXEXT_INLINE
+#define HAVE_MMX_INLINE 1
+#define HAVE_MMXEXT_INLINE 1
 #define RENAME(a) a ## _MMX2
 #include "postprocess_template.c"
 #endif
@ -605,12 +605,12 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
 //3DNOW versions
 #ifdef COMPILE_3DNOW
 #undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMXEXT
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX 1
-#define HAVE_MMXEXT 0
-#define HAVE_AMD3DNOW 1
+#undef HAVE_MMX_INLINE
+#undef HAVE_MMXEXT_INLINE
+#undef HAVE_AMD3DNOW_INLINE
+#define HAVE_MMX_INLINE 1
+#define HAVE_MMXEXT_INLINE 0
+#define HAVE_AMD3DNOW_INLINE 1
 #define RENAME(a) a ## _3DNow
 #include "postprocess_template.c"
 #endif
@ -633,7 +633,7 @@ static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[]
    // difference would not be measurable here but it is much better because
    // someone might exchange the CPU whithout restarting MPlayer ;)
 #if CONFIG_RUNTIME_CPUDETECT
-#if ARCH_X86
+#if ARCH_X86 && HAVE_INLINE_ASM
    // ordered per speed fastest first
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
@ -652,11 +652,11 @@ static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[]
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #endif
 #else /* CONFIG_RUNTIME_CPUDETECT */
-#if   HAVE_MMXEXT
+#if   HAVE_MMXEXT_INLINE
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#elif HAVE_AMD3DNOW
+#elif HAVE_AMD3DNOW_INLINE
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#elif HAVE_MMX
+#elif HAVE_MMX_INLINE
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #elif HAVE_ALTIVEC
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
--- a/libpostproc/postprocess_template.c
+++ b/libpostproc/postprocess_template.c
@ -30,32 +30,32 @@
 #undef PMINUB
 #undef PMAXUB

-#if   HAVE_MMXEXT
+#if   HAVE_MMXEXT_INLINE
 #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif HAVE_AMD3DNOW
+#elif HAVE_AMD3DNOW_INLINE
 #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
 #endif
 #define PAVGB(a,b)  REAL_PAVGB(a,b)

-#if   HAVE_MMXEXT
+#if   HAVE_MMXEXT_INLINE
 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
-#elif HAVE_MMX
+#elif HAVE_MMX_INLINE
 #define PMINUB(b,a,t) \
    "movq " #a ", " #t " \n\t"\
    "psubusb " #b ", " #t " \n\t"\
    "psubb " #t ", " #a " \n\t"
 #endif

-#if   HAVE_MMXEXT
+#if   HAVE_MMXEXT_INLINE
 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
-#elif HAVE_MMX
+#elif HAVE_MMX_INLINE
 #define PMAXUB(a,b) \
    "psubusb " #a ", " #b " \n\t"\
    "paddb " #a ", " #b " \n\t"
 #endif

 //FIXME? |255-0| = 1 (should not be a problem ...)
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
 /**
 * Check if the middle 8x8 Block in the given 8x16 block is flat
 */
@ -135,7 +135,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
        "psubusb %%mm3, %%mm4                   \n\t"

        "                                       \n\t"
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
        "pxor %%mm7, %%mm7                      \n\t"
        "psadbw %%mm7, %%mm0                    \n\t"
 #else
@ -169,7 +169,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
        return 2;
    }
 }
-#endif //HAVE_MMX
+#endif //HAVE_MMX_INLINE

 /**
 * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
@ -178,7 +178,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
 #if !HAVE_ALTIVEC
 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
 {
-#if HAVE_MMXEXT || HAVE_AMD3DNOW
+#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    src+= stride*3;
    __asm__ volatile(        //"movv %0 %1 %2\n\t"
        "movq %2, %%mm0                         \n\t"  // QP,..., QP
@ -305,7 +305,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
        : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
        : "%"REG_a, "%"REG_c
    );
-#else //HAVE_MMXEXT || HAVE_AMD3DNOW
+#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    const int l1= stride;
    const int l2= stride + l1;
    const int l3= stride + l2;
@ -344,7 +344,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)

        src++;
    }
-#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
+#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
 }
 #endif //HAVE_ALTIVEC

@ -357,7 +357,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
 */
 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
 {
-#if HAVE_MMXEXT || HAVE_AMD3DNOW
+#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    src+= stride*3;

    __asm__ volatile(
@ -443,7 +443,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
        : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
        : "%"REG_a, "%"REG_c
    );
-#else //HAVE_MMXEXT || HAVE_AMD3DNOW
+#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE

    const int l1= stride;
    const int l2= stride + l1;
@ -477,13 +477,13 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
        }
        src++;
    }
-#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
+#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
 }

 #if !HAVE_ALTIVEC
 static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
 {
-#if HAVE_MMXEXT || HAVE_AMD3DNOW
+#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
 /*
    uint8_t tmp[16];
    const int l1= stride;
@ -764,7 +764,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
    }
 }
 */
-#elif HAVE_MMX
+#elif HAVE_MMX_INLINE
    DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars
    src+= stride*4;
    __asm__ volatile(
@ -872,7 +872,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
        "movq (%3), %%mm2                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3
        "movq 8(%3), %%mm3                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3

-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
        "movq %%mm7, %%mm6                      \n\t" // 0
        "psubw %%mm0, %%mm6                     \n\t"
        "pmaxsw %%mm6, %%mm0                    \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
@ -904,7 +904,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
        "psubw %%mm6, %%mm3                     \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
 #endif

-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
        "pminsw %%mm2, %%mm0                    \n\t"
        "pminsw %%mm3, %%mm1                    \n\t"
 #else
@ -968,7 +968,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
        "pand %%mm2, %%mm4                      \n\t"
        "pand %%mm3, %%mm5                      \n\t"

-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
        "pminsw %%mm0, %%mm4                    \n\t"
        "pminsw %%mm1, %%mm5                    \n\t"
 #else
@ -995,7 +995,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
        : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
        : "%"REG_a
    );
-#else //HAVE_MMXEXT || HAVE_AMD3DNOW
+#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    const int l1= stride;
    const int l2= stride + l1;
    const int l3= stride + l2;
@ -1033,14 +1033,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
        }
        src++;
    }
-#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
+#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
 }
 #endif //HAVE_ALTIVEC

 #if !HAVE_ALTIVEC
 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
 {
-#if HAVE_MMXEXT || HAVE_AMD3DNOW
+#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    DECLARE_ALIGNED(8, uint64_t, tmp)[3];
    __asm__ volatile(
        "pxor %%mm6, %%mm6                      \n\t"
@ -1060,7 +1060,7 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)

 #undef REAL_FIND_MIN_MAX
 #undef FIND_MIN_MAX
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
 #define REAL_FIND_MIN_MAX(addr)\
        "movq " #addr ", %%mm0                  \n\t"\
        "pminub %%mm0, %%mm7                    \n\t"\
@ -1087,7 +1087,7 @@ FIND_MIN_MAX((%0, %1, 8))

        "movq %%mm7, %%mm4                      \n\t"
        "psrlq $8, %%mm7                        \n\t"
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
        "pminub %%mm4, %%mm7                    \n\t" // min of pixels
        "pshufw $0xF9, %%mm7, %%mm4             \n\t"
        "pminub %%mm4, %%mm7                    \n\t" // min of pixels
@ -1112,7 +1112,7 @@ FIND_MIN_MAX((%0, %1, 8))

        "movq %%mm6, %%mm4                      \n\t"
        "psrlq $8, %%mm6                        \n\t"
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
        "pmaxub %%mm4, %%mm6                    \n\t" // max of pixels
        "pshufw $0xF9, %%mm6, %%mm4             \n\t"
        "pmaxub %%mm4, %%mm6                    \n\t"
@ -1266,7 +1266,7 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
        : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
        : "%"REG_a, "%"REG_d
    );
-#else //HAVE_MMXEXT || HAVE_AMD3DNOW
+#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    int y;
    int min=255;
    int max=0;
@ -1383,7 +1383,7 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
 //        src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
    }
 #endif
-#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
+#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
 }
 #endif //HAVE_ALTIVEC

@ -1395,7 +1395,7 @@ DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
 */
 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
 {
-#if HAVE_MMXEXT || HAVE_AMD3DNOW
+#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    src+= 4*stride;
    __asm__ volatile(
        "lea (%0, %1), %%"REG_a"                \n\t"
@ -1448,7 +1448,7 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
 */
 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
 {
-#if HAVE_MMXEXT || HAVE_AMD3DNOW
+#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    src+= stride*3;
    __asm__ volatile(
        "lea (%0, %1), %%"REG_a"                \n\t"
@ -1490,7 +1490,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc,
        : : "r" (src), "r" ((x86_reg)stride)
        : "%"REG_a, "%"REG_d, "%"REG_c
    );
-#else //HAVE_MMXEXT || HAVE_AMD3DNOW
+#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    int x;
    src+= stride*3;
    for(x=0; x<8; x++){
@ -1500,7 +1500,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc,
        src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
        src++;
    }
-#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
+#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
 }

 /**
@ -1512,7 +1512,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc,
 */
 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
 {
-#if HAVE_MMXEXT || HAVE_AMD3DNOW
+#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    src+= stride*4;
    __asm__ volatile(
        "lea (%0, %1), %%"REG_a"                \n\t"
@ -1561,7 +1561,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
        : : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
        : "%"REG_a, "%"REG_d
    );
-#else //HAVE_MMXEXT || HAVE_AMD3DNOW
+#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    int x;
    src+= stride*4;
    for(x=0; x<8; x++){
@ -1579,7 +1579,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))

        src++;
    }
-#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
+#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
 }

 /**
@ -1591,7 +1591,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
 */
 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
 {
-#if HAVE_MMXEXT || HAVE_AMD3DNOW
+#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    src+= stride*4;
    __asm__ volatile(
        "lea (%0, %1), %%"REG_a"                \n\t"
@ -1651,7 +1651,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))
        : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
        : "%"REG_a, "%"REG_d
    );
-#else //HAVE_MMXEXT || HAVE_AMD3DNOW
+#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    int x;
    src+= stride*4;
    for(x=0; x<8; x++){
@ -1680,7 +1680,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))

        src++;
    }
-#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
+#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
 }

 /**
@ -1692,7 +1692,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))
 */
 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
 {
-#if HAVE_MMXEXT || HAVE_AMD3DNOW
+#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    src+= 4*stride;
    __asm__ volatile(
        "lea (%0, %1), %%"REG_a"                \n\t"
@ -1739,7 +1739,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
        : : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
        : "%"REG_a, "%"REG_d
    );
-#else //HAVE_MMXEXT || HAVE_AMD3DNOW
+#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    int a, b, c, x;
    src+= 4*stride;

@ -1782,7 +1782,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
        src += 4;
        tmp += 4;
    }
-#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
+#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
 }

 /**
@ -1793,9 +1793,9 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
 */
 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
 {
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
    src+= 4*stride;
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
    __asm__ volatile(
        "lea (%0, %1), %%"REG_a"                \n\t"
        "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
@ -1885,8 +1885,8 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
        : : "r" (src), "r" ((x86_reg)stride)
        : "%"REG_a, "%"REG_d
    );
-#endif //HAVE_MMXEXT
-#else //HAVE_MMX
+#endif //HAVE_MMXEXT_INLINE
+#else //HAVE_MMX_INLINE
    int x, y;
    src+= 4*stride;
    // FIXME - there should be a way to do a few columns in parallel like w/mmx
@ -1905,10 +1905,10 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
        }
        src++;
    }
-#endif //HAVE_MMX
+#endif //HAVE_MMX_INLINE
 }

-#if HAVE_MMX
+#if HAVE_MMX_INLINE
 /**
 * Transpose and shift the given 8x8 Block into dst1 and dst2.
 */
@ -2073,7 +2073,7 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
        : "%"REG_a, "%"REG_d
    );
 }
-#endif //HAVE_MMX
+#endif //HAVE_MMX_INLINE
 //static long test=0;

 #if !HAVE_ALTIVEC
@ -2087,7 +2087,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,

 #define FAST_L2_DIFF
 //#define L1_DIFF //u should change the thresholds too if u try that one
-#if HAVE_MMXEXT || HAVE_AMD3DNOW
+#if HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
    __asm__ volatile(
        "lea (%2, %2, 2), %%"REG_a"             \n\t" // 3*stride
        "lea (%2, %2, 4), %%"REG_d"             \n\t" // 5*stride
@ -2375,7 +2375,7 @@ L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc))
        :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
        : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
    );
-#else //HAVE_MMXEXT || HAVE_AMD3DNOW
+#else //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
 {
    int y;
    int d=0;
@ -2458,11 +2458,11 @@ Switch between
        }
    }
 }
-#endif //HAVE_MMXEXT || HAVE_AMD3DNOW
+#endif //HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE
 }
 #endif //HAVE_ALTIVEC

-#if HAVE_MMX
+#if HAVE_MMX_INLINE
 /**
 * accurate deblock filter
 */
@ -2865,7 +2865,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
            "movq (%4), %%mm2                       \n\t" // 2L0 - 5L1 + 5L2 - 2L3
            "movq 8(%4), %%mm3                      \n\t" // 2H0 - 5H1 + 5H2 - 2H3

-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
            "movq %%mm7, %%mm6                      \n\t" // 0
            "psubw %%mm0, %%mm6                     \n\t"
            "pmaxsw %%mm6, %%mm0                    \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
@ -2897,7 +2897,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
            "psubw %%mm6, %%mm3                     \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
 #endif

-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
            "pminsw %%mm2, %%mm0                    \n\t"
            "pminsw %%mm3, %%mm1                    \n\t"
 #else
@ -2961,7 +2961,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
            "pand %%mm2, %%mm4                      \n\t"
            "pand %%mm3, %%mm5                      \n\t"

-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
            "pminsw %%mm0, %%mm4                    \n\t"
            "pminsw %%mm1, %%mm5                    \n\t"
 #else
@ -2998,7 +2998,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
 }
    } */
 }
-#endif //HAVE_MMX
+#endif //HAVE_MMX_INLINE

 static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
                                const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
@ -3013,18 +3013,18 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride,
                                     int levelFix, int64_t *packedOffsetAndScale)
 {
-#if !HAVE_MMX
+#if !HAVE_MMX_INLINE
    int i;
 #endif
    if(levelFix){
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
    __asm__ volatile(
        "movq (%%"REG_a"), %%mm2        \n\t" // packedYOffset
        "movq 8(%%"REG_a"), %%mm3       \n\t" // packedYScale
        "lea (%2,%4), %%"REG_a"         \n\t"
        "lea (%3,%5), %%"REG_d"         \n\t"
        "pxor %%mm4, %%mm4              \n\t"
-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
 #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \
        "movq " #src1 ", %%mm0          \n\t"\
        "movq " #src1 ", %%mm5          \n\t"\
@ -3047,7 +3047,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
        "movq %%mm0, " #dst1 "          \n\t"\
        "movq %%mm1, " #dst2 "          \n\t"\

-#else //HAVE_MMXEXT
+#else //HAVE_MMXEXT_INLINE
 #define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                        \
        "movq " #src1 ", %%mm0          \n\t"\
        "movq " #src1 ", %%mm5          \n\t"\
@ -3074,7 +3074,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
        "movq %%mm0, " #dst1 "          \n\t"\
        "movq %%mm1, " #dst2 "          \n\t"\

-#endif //HAVE_MMXEXT
+#endif //HAVE_MMXEXT_INLINE
 #define SCALED_CPY(src1, src2, dst1, dst2)\
   REAL_SCALED_CPY(src1, src2, dst1, dst2)

@ -3094,13 +3094,13 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
        "r" ((x86_reg)dstStride)
        : "%"REG_d
    );
-#else //HAVE_MMX
+#else //HAVE_MMX_INLINE
    for(i=0; i<8; i++)
        memcpy( &(dst[dstStride*i]),
                &(src[srcStride*i]), BLOCK_SIZE);
-#endif //HAVE_MMX
+#endif //HAVE_MMX_INLINE
    }else{
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
    __asm__ volatile(
        "lea (%0,%2), %%"REG_a"                 \n\t"
        "lea (%1,%3), %%"REG_d"                 \n\t"
@ -3127,11 +3127,11 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
        "r" ((x86_reg)dstStride)
        : "%"REG_a, "%"REG_d
    );
-#else //HAVE_MMX
+#else //HAVE_MMX_INLINE
    for(i=0; i<8; i++)
        memcpy( &(dst[dstStride*i]),
                &(src[srcStride*i]), BLOCK_SIZE);
-#endif //HAVE_MMX
+#endif //HAVE_MMX_INLINE
    }
 }

@ -3140,7 +3140,7 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
 */
 static inline void RENAME(duplicate)(uint8_t src[], int stride)
 {
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
    __asm__ volatile(
        "movq (%0), %%mm0               \n\t"
        "add %1, %0                     \n\t"
@ -3177,7 +3177,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
    int QPCorrecture= 256*256;

    int copyAhead;
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
    int i;
 #endif

@ -3190,7 +3190,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
    uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
    //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;

-#if HAVE_MMX
+#if HAVE_MMX_INLINE
    for(i=0; i<57; i++){
        int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
        int threshold= offset*2 + 1;
@ -3248,7 +3248,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[

        scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);

-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
        c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
        c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
 #else
@ -3281,7 +3281,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
        // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
        for(x=0; x<width; x+=BLOCK_SIZE){

-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
 /*
            prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
            prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
@ -3308,7 +3308,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                : "%"REG_a, "%"REG_d
            );

-#elif HAVE_AMD3DNOW
+#elif HAVE_AMD3DNOW_INLINE
 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
 /*          prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
            prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
@ -3354,7 +3354,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
        //1% speedup if these are here instead of the inner loop
        const uint8_t *srcBlock= &(src[y*srcStride]);
        uint8_t *dstBlock= &(dst[y*dstStride]);
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
        uint8_t *tempBlock1= c.tempBlocks;
        uint8_t *tempBlock2= c.tempBlocks + 8;
 #endif
@ -3390,7 +3390,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
        // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
        for(x=0; x<width; x+=BLOCK_SIZE){
            const int stride= dstStride;
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
            uint8_t *tmpXchg;
 #endif
            if(isColor){
@ -3404,7 +3404,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                yHistogram[ srcBlock[srcStride*12 + 4] ]++;
            }
            c.QP= QP;
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
            __asm__ volatile(
                "movd %1, %%mm7         \n\t"
                "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
@ -3417,7 +3417,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
 #endif


-#if HAVE_MMXEXT
+#if HAVE_MMXEXT_INLINE
 /*
            prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
            prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
@ -3444,7 +3444,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                : "%"REG_a, "%"REG_d
            );

-#elif HAVE_AMD3DNOW
+#elif HAVE_AMD3DNOW_INLINE
 //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
 /*          prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
            prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
@ -3488,12 +3488,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                }
            }

-#if HAVE_MMX
+#if HAVE_MMX_INLINE
            RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
 #endif
            /* check if we have a previous block to deblock it with dstBlock */
            if(x - 8 >= 0){
-#if HAVE_MMX
+#if HAVE_MMX_INLINE
                if(mode & H_X1_FILTER)
                        RENAME(vertX1Filter)(tempBlock1, 16, &c);
                else if(mode & H_DEBLOCK){
@ -3539,7 +3539,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                }else if(mode & H_A_DEBLOCK){
                    RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
                }
-#endif //HAVE_MMX
+#endif //HAVE_MMX_INLINE
                if(mode & DERING){
                //FIXME filter first line
                    if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
@ -3557,7 +3557,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
            dstBlock+=8;
            srcBlock+=8;

-#if HAVE_MMX
+#if HAVE_MMX_INLINE
            tmpXchg= tempBlock1;
            tempBlock1= tempBlock2;
            tempBlock2 = tmpXchg;
@ -3597,9 +3597,9 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
                + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
        }*/
    }
-#if   HAVE_AMD3DNOW
+#if   HAVE_AMD3DNOW_INLINE
    __asm__ volatile("femms");
-#elif HAVE_MMX
+#elif HAVE_MMX_INLINE
    __asm__ volatile("emms");
 #endif