FFmpeg/libavcodec/x86/mathops.h

/*
 * simple math operations
 * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#ifndef AVCODEC_X86_MATHOPS_H
#define AVCODEC_X86_MATHOPS_H

#include "config.h"

#include "libavutil/common.h"
#include "libavutil/x86/asm.h"

#if HAVE_INLINE_ASM

#if ARCH_X86_32

#define MULL MULL
static av_always_inline av_const int MULL(int a, int b, unsigned shift)
{
    int rt, dummy;
    __asm__ (
        "imull %3               \n\t"
        "shrdl %4, %%edx, %%eax \n\t"
        :"=a"(rt), "=d"(dummy)
        :"a"(a), "rm"(b), "ci"((uint8_t)shift)
    );
    return rt;
}

#define MULH MULH
static av_always_inline av_const int MULH(int a, int b)
{
    int rt, dummy;
    __asm__ (
        "imull %3"
        :"=d"(rt), "=a"(dummy)
        :"a"(a), "rm"(b)
    );
    return rt;
}

#define MUL64 MUL64
static av_always_inline av_const int64_t MUL64(int a, int b)
{
    int64_t rt;
    __asm__ (
        "imull %2"
        :"=A"(rt)
        :"a"(a), "rm"(b)
    );
    return rt;
}

#endif /* ARCH_X86_32 */

#if HAVE_I686
/* median of 3 */
#define mid_pred mid_pred
static inline av_const int mid_pred(int a, int b, int c)
{
    int i=b;
    __asm__ (
        "cmp    %2, %1 \n\t"
        "cmovg  %1, %0 \n\t"
        "cmovg  %2, %1 \n\t"
        "cmp    %3, %1 \n\t"
        "cmovl  %3, %1 \n\t"
        "cmp    %1, %0 \n\t"
        "cmovg  %1, %0 \n\t"
        :"+&r"(i), "+&r"(a)
        :"r"(b), "r"(c)
    );
    return i;
}

#if HAVE_6REGS
#define COPY3_IF_LT(x, y, a, b, c, d)\
__asm__ volatile(\
    "cmpl  %0, %3       \n\t"\
    "cmovl %3, %0       \n\t"\
    "cmovl %4, %1       \n\t"\
    "cmovl %5, %2       \n\t"\
    : "+&r" (x), "+&r" (a), "+r" (c)\
    : "r" (y), "r" (b), "r" (d)\
);
#endif /* HAVE_6REGS */

#endif /* HAVE_I686 */

#define MASK_ABS(mask, level)                   \
    __asm__ ("cdq                    \n\t"      \
             "xorl %1, %0            \n\t"      \
             "subl %1, %0            \n\t"      \
             : "+a"(level), "=&d"(mask))

// avoid +32 for shift optimization (gcc should do that ...)
#define NEG_SSR32 NEG_SSR32
static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
    __asm__ ("sarl %1, %0\n\t"
         : "+r" (a)
         : "ic" ((uint8_t)(-s))
    );
    return a;
}

#define NEG_USR32 NEG_USR32
static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
    __asm__ ("shrl %1, %0\n\t"
         : "+r" (a)
         : "ic" ((uint8_t)(-s))
    );
    return a;
}

#endif /* HAVE_INLINE_ASM */
#endif /* AVCODEC_X86_MATHOPS_H */
New single instruction math operation header Originally committed as revision 6291 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`/*`
			`* simple math operations`
			`* Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al`
			`*`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* This file is part of FFmpeg.`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
New single instruction math operation header Originally committed as revision 6291 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* version 2.1 of the License, or (at your option) any later version.`
New single instruction math operation header Originally committed as revision 6291 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`*`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* FFmpeg is distributed in the hope that it will be useful,`
New single instruction math operation header Originally committed as revision 6291 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
Change license headers to say 'FFmpeg' instead of 'this program/this library' and fix GPL/LGPL version mismatches. Originally committed as revision 6577 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* License along with FFmpeg; if not, write to the Free Software`
New single instruction math operation header Originally committed as revision 6291 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago			`* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

Rename libavcodec/i386/ --> libavcodec/x86/. It contains optimizations that are not specific to i386 and libavutil uses this naming scheme already. Originally committed as revision 16270 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`#ifndef AVCODEC_X86_MATHOPS_H`
			`#define AVCODEC_X86_MATHOPS_H`
add multiple inclusion guards to headers Originally committed as revision 9345 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago
moves mid_pred() into mathops.h (with arch specific code split by directory) Originally committed as revision 16681 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`#include "config.h"`
x86: Put COPY3_IF_LT under HAVE_6REGS It uses 6 registers, unbreaks building on hardened x86 system. Bug-Id: gentoo/541930 CC: libav-stable@libav.org 10 years ago
moves mid_pred() into mathops.h (with arch specific code split by directory) Originally committed as revision 16681 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`#include "libavutil/common.h"`
Fix compilation with !HAVE_6REGS. Can be tested with: $ ./configure --cc='cc -m32' --disable-optimizations --enable-pic 11 years ago			`#include "libavutil/x86/asm.h"`
moves mid_pred() into mathops.h (with arch specific code split by directory) Originally committed as revision 16681 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago
x86: place some inline asm under #if HAVE_INLINE_ASM Signed-off-by: Mans Rullgard <mans@mansr.com> 13 years ago			`#if HAVE_INLINE_ASM`

re-enable mid_pred asm on x86_64. (broke in r16681) Originally committed as revision 17058 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`#if ARCH_X86_32`
mathops: convert MULL/MULH/MUL64 to inline functions rather than macros. This fixes unexpected name collisions that were occurring with variables declared within the macros. It also fixes the fate-acodec-ac3_fixed regression test on x86-32. 14 years ago
			`#define MULL MULL`
			`static av_always_inline av_const int MULL(int a, int b, unsigned shift)`
			`{`
			`int rt, dummy;`
			`__asm__ (`
			`"imull %3 \n\t"`
			`"shrdl %4, %%edx, %%eax \n\t"`
			`:"=a"(rt), "=d"(dummy)`
mathops: fix MULL() when the compiler does not inline the function. If the function is not inlined, an immmediate cannot be used for the shift parameter, so the %cl register must be used instead in that case. This fixes compilation for x86-32 using gcc with --disable-optimizations. 14 years ago			`:"a"(a), "rm"(b), "ci"((uint8_t)shift)`
mathops: convert MULL/MULH/MUL64 to inline functions rather than macros. This fixes unexpected name collisions that were occurring with variables declared within the macros. It also fixes the fate-acodec-ac3_fixed regression test on x86-32. 14 years ago			`);`
			`return rt;`
			`}`

			`#define MULH MULH`
			`static av_always_inline av_const int MULH(int a, int b)`
			`{`
			`int rt, dummy;`
			`__asm__ (`
			`"imull %3"`
			`:"=d"(rt), "=a"(dummy)`
			`:"a"(a), "rm"(b)`
			`);`
			`return rt;`
			`}`

			`#define MUL64 MUL64`
			`static av_always_inline av_const int64_t MUL64(int a, int b)`
			`{`
			`int64_t rt;`
			`__asm__ (`
			`"imull %2"`
			`:"=A"(rt)`
mathops: change "g" constraint to "rm" in x86-32 version of MUL64(). The 1-arg imul instruction cannot take an immediate argument, only a register or memory argument. 14 years ago			`:"a"(a), "rm"(b)`
mathops: convert MULL/MULH/MUL64 to inline functions rather than macros. This fixes unexpected name collisions that were occurring with variables declared within the macros. It also fixes the fate-acodec-ac3_fixed regression test on x86-32. 14 years ago			`);`
			`return rt;`
			`}`

			`#endif /* ARCH_X86_32 */`
New single instruction math operation header Originally committed as revision 6291 to svn://svn.ffmpeg.org/ffmpeg/trunk 18 years ago
configure: Rename cmov processor capability to i686 The goal is to make the capapility slightly more general and have it cover the availability of the nopl instruction in addition to cmov. 12 years ago			`#if HAVE_I686`
moves mid_pred() into mathops.h (with arch specific code split by directory) Originally committed as revision 16681 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`/* median of 3 */`
			`#define mid_pred mid_pred`
			`static inline av_const int mid_pred(int a, int b, int c)`
			`{`
			`int i=b;`
mathops/x86: work around inline asm miscompilation with GCC 4.8.1 The volatile is not required here, and prevents a miscompilation with GCC 4.8.1 when building on x86 with --cpu=i686 Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 12 years ago			`__asm__ (`
moves mid_pred() into mathops.h (with arch specific code split by directory) Originally committed as revision 16681 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`"cmp %2, %1 \n\t"`
			`"cmovg %1, %0 \n\t"`
			`"cmovg %2, %1 \n\t"`
			`"cmp %3, %1 \n\t"`
			`"cmovl %3, %1 \n\t"`
			`"cmp %1, %0 \n\t"`
			`"cmovg %1, %0 \n\t"`
			`:"+&r"(i), "+&r"(a)`
			`:"r"(b), "r"(c)`
			`);`
			`return i;`
			`}`

Fix compilation with !HAVE_6REGS. Can be tested with: $ ./configure --cc='cc -m32' --disable-optimizations --enable-pic 11 years ago			`#if HAVE_6REGS`
Move COPY3_IF_LT to lavc/mathops.h This obscure macro is only used in motion_est.c so having it in lavc makes more sense. See discussion here: http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2008-November/056561.html Originally committed as revision 21346 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`#define COPY3_IF_LT(x, y, a, b, c, d)\`
			`__asm__ volatile(\`
			`"cmpl %0, %3 \n\t"\`
			`"cmovl %3, %0 \n\t"\`
			`"cmovl %4, %1 \n\t"\`
			`"cmovl %5, %2 \n\t"\`
			`: "+&r" (x), "+&r" (a), "+r" (c)\`
			`: "r" (y), "r" (b), "r" (d)\`
			`);`
Fix compilation with !HAVE_6REGS. Can be tested with: $ ./configure --cc='cc -m32' --disable-optimizations --enable-pic 11 years ago			`#endif /* HAVE_6REGS */`
x86: Put COPY3_IF_LT under HAVE_6REGS It uses 6 registers, unbreaks building on hardened x86 system. Bug-Id: gentoo/541930 CC: libav-stable@libav.org 10 years ago
configure: Rename cmov processor capability to i686 The goal is to make the capapility slightly more general and have it cover the availability of the nopl instruction in addition to cmov. 12 years ago			`#endif /* HAVE_I686 */`
Move COPY3_IF_LT to lavc/mathops.h This obscure macro is only used in motion_est.c so having it in lavc makes more sense. See discussion here: http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2008-November/056561.html Originally committed as revision 21346 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago
Move MASK_ABS macro to libavcodec/mathops.h This macro is only used in two places, both in libavcodec, so this is a more sensible place for it. Two small tweaks to the macro are made: - removing the trailing semicolon - dropping unnecessary 'volatile' from the x86 asm Signed-off-by: Mans Rullgard <mans@mansr.com> 12 years ago			`#define MASK_ABS(mask, level) \`
Use intel compliant CDQ instead of CLTD in inline asm. Signed-off-by: Michael Niedermayer <michaelni@gmx.at> 11 years ago			`__asm__ ("cdq \n\t" \`
Move MASK_ABS macro to libavcodec/mathops.h This macro is only used in two places, both in libavcodec, so this is a more sensible place for it. Two small tweaks to the macro are made: - removing the trailing semicolon - dropping unnecessary 'volatile' from the x86 asm Signed-off-by: Mans Rullgard <mans@mansr.com> 12 years ago			`"xorl %1, %0 \n\t" \`
			`"subl %1, %0 \n\t" \`
			`: "+a"(level), "=&d"(mask))`

Move NEG_[US]SR32 macros to mathops.h Originally committed as revision 21873 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`// avoid +32 for shift optimization (gcc should do that ...)`
			`#define NEG_SSR32 NEG_SSR32`
			`static inline int32_t NEG_SSR32( int32_t a, int8_t s){`
			`__asm__ ("sarl %1, %0\n\t"`
			`: "+r" (a)`
			`: "ic" ((uint8_t)(-s))`
			`);`
			`return a;`
			`}`

			`#define NEG_USR32 NEG_USR32`
			`static inline uint32_t NEG_USR32(uint32_t a, int8_t s){`
			`__asm__ ("shrl %1, %0\n\t"`
			`: "+r" (a)`
			`: "ic" ((uint8_t)(-s))`
			`);`
			`return a;`
			`}`

x86: place some inline asm under #if HAVE_INLINE_ASM Signed-off-by: Mans Rullgard <mans@mansr.com> 13 years ago			`#endif /* HAVE_INLINE_ASM */`
Rename libavcodec/i386/ --> libavcodec/x86/. It contains optimizations that are not specific to i386 and libavutil uses this naming scheme already. Originally committed as revision 16270 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`#endif /* AVCODEC_X86_MATHOPS_H */`