FFmpeg/libavcodec/x86/idct_sse2_xvid.c

/*
 * XVID MPEG-4 VIDEO CODEC
 * - SSE2 inverse discrete cosine transform -
 *
 * Copyright(C) 2003 Pascal Massimino <skal@planet-d.net>
 *
 * Conversion to gcc syntax with modifications
 * by Alexander Strange <astrange@ithinksw.com>
 *
 * Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid.
 *
 * This file is part of FFmpeg.
 *
 * Vertical pass is an implementation of the scheme:
 *  Loeffler C., Ligtenberg A., and Moschytz C.S.:
 *  Practical Fast 1D DCT Algorithm with Eleven Multiplications,
 *  Proc. ICASSP 1989, 988-991.
 *
 * Horizontal pass is a double 4x4 vector/matrix multiplication,
 * (see also Intel's Application Note 922:
 *  http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
 *  Copyright (C) 1999 Intel Corporation)
 *
 * More details at http://skal.planet-d.net/coding/dct.html
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with FFmpeg; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

#include "libavcodec/dsputil.h"
#include "libavutil/x86_cpu.h"
#include "idct_xvid.h"
#include "dsputil_mmx.h"

/*!
 * @file
 * @brief SSE2 idct compatible with xvidmmx
 */

#define X8(x)     x,x,x,x,x,x,x,x

#define ROW_SHIFT 11
#define COL_SHIFT 6

DECLARE_ASM_CONST(16, int16_t, tan1)[] = {X8(13036)}; // tan( pi/16)
DECLARE_ASM_CONST(16, int16_t, tan2)[] = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1
DECLARE_ASM_CONST(16, int16_t, tan3)[] = {X8(43790)}; // tan(3pi/16)-1
DECLARE_ASM_CONST(16, int16_t, sqrt2)[]= {X8(23170)}; // 0.5/sqrt(2)
DECLARE_ASM_CONST(8,  uint8_t, m127)[] = {X8(127)};

DECLARE_ASM_CONST(16, int16_t, iTab1)[] = {
 0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d,
 0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61,
 0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7,
 0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b
};

DECLARE_ASM_CONST(16, int16_t, iTab2)[] = {
 0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5,
 0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04,
 0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41,
 0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df
};

DECLARE_ASM_CONST(16, int16_t, iTab3)[] = {
 0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf,
 0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf,
 0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d,
 0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04
};

DECLARE_ASM_CONST(16, int16_t, iTab4)[] = {
 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746,
 0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac,
 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df,
 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e
};

DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders)[] = {
 65536, 65536, 65536, 65536,
  3597,  3597,  3597,  3597,
  2260,  2260,  2260,  2260,
  1203,  1203,  1203,  1203,
   120,   120,   120,   120,
   512,   512,   512,   512
};

// Temporary storage before the column pass
#define ROW1 "%%xmm6"
#define ROW3 "%%xmm4"
#define ROW5 "%%xmm5"
#define ROW7 "%%xmm7"

#define CLEAR_ODD(r) "pxor  "r","r" \n\t"
#define PUT_ODD(dst) "pshufhw  $0x1B, %%xmm2, "dst"   \n\t"

#if ARCH_X86_64

# define ROW0 "%%xmm8"
# define REG0 ROW0
# define ROW2 "%%xmm9"
# define REG2 ROW2
# define ROW4 "%%xmm10"
# define REG4 ROW4
# define ROW6 "%%xmm11"
# define REG6 ROW6
# define CLEAR_EVEN(r) CLEAR_ODD(r)
# define PUT_EVEN(dst) PUT_ODD(dst)
# define XMMS "%%xmm12"
# define MOV_32_ONLY "#"
# define SREG2 REG2
# define TAN3 "%%xmm13"
# define TAN1 "%%xmm14"

#else

# define ROW0 "(%0)"
# define REG0 "%%xmm4"
# define ROW2 "2*16(%0)"
# define REG2 "%%xmm4"
# define ROW4 "4*16(%0)"
# define REG4 "%%xmm6"
# define ROW6 "6*16(%0)"
# define REG6 "%%xmm6"
# define CLEAR_EVEN(r)
# define PUT_EVEN(dst) \
    "pshufhw  $0x1B, %%xmm2, %%xmm2   \n\t" \
    "movdqa          %%xmm2, "dst"    \n\t"
# define XMMS "%%xmm2"
# define MOV_32_ONLY "movdqa "
# define SREG2 "%%xmm7"
# define TAN3 "%%xmm0"
# define TAN1 "%%xmm2"

#endif

#define ROUND(x) "paddd   "MANGLE(x)

#define JZ(reg, to)                         \
    "testl     "reg","reg"            \n\t" \
    "jz        "to"                   \n\t"

#define JNZ(reg, to)                        \
    "testl     "reg","reg"            \n\t" \
    "jnz       "to"                   \n\t"

#define TEST_ONE_ROW(src, reg, clear)       \
    clear                                   \
    "movq     "src", %%mm1            \n\t" \
    "por    8+"src", %%mm1            \n\t" \
    "paddusb  %%mm0, %%mm1            \n\t" \
    "pmovmskb %%mm1, "reg"            \n\t"

#define TEST_TWO_ROWS(row1, row2, reg1, reg2, clear1, clear2) \
    clear1                                  \
    clear2                                  \
    "movq     "row1", %%mm1           \n\t" \
    "por    8+"row1", %%mm1           \n\t" \
    "movq     "row2", %%mm2           \n\t" \
    "por    8+"row2", %%mm2           \n\t" \
    "paddusb   %%mm0, %%mm1           \n\t" \
    "paddusb   %%mm0, %%mm2           \n\t" \
    "pmovmskb  %%mm1, "reg1"          \n\t" \
    "pmovmskb  %%mm2, "reg2"          \n\t"

///IDCT pass on rows.
#define iMTX_MULT(src, table, rounder, put) \
    "movdqa        "src", %%xmm3      \n\t" \
    "movdqa       %%xmm3, %%xmm0      \n\t" \
    "pshufd   $0x11, %%xmm3, %%xmm1   \n\t" /* 4602 */ \
    "punpcklqdq   %%xmm0, %%xmm0      \n\t" /* 0246 */ \
    "pmaddwd     "table", %%xmm0      \n\t" \
    "pmaddwd  16+"table", %%xmm1      \n\t" \
    "pshufd   $0xBB, %%xmm3, %%xmm2   \n\t" /* 5713 */ \
    "punpckhqdq   %%xmm3, %%xmm3      \n\t" /* 1357 */ \
    "pmaddwd  32+"table", %%xmm2      \n\t" \
    "pmaddwd  48+"table", %%xmm3      \n\t" \
    "paddd        %%xmm1, %%xmm0      \n\t" \
    "paddd        %%xmm3, %%xmm2      \n\t" \
    rounder",     %%xmm0              \n\t" \
    "movdqa       %%xmm2, %%xmm3      \n\t" \
    "paddd        %%xmm0, %%xmm2      \n\t" \
    "psubd        %%xmm3, %%xmm0      \n\t" \
    "psrad           $11, %%xmm2      \n\t" \
    "psrad           $11, %%xmm0      \n\t" \
    "packssdw     %%xmm0, %%xmm2      \n\t" \
    put                                     \
    "1:                               \n\t"

#define iLLM_HEAD                           \
    "movdqa   "MANGLE(tan3)", "TAN3"  \n\t" \
    "movdqa   "MANGLE(tan1)", "TAN1"  \n\t" \

///IDCT pass on columns.
#define iLLM_PASS(dct)                      \
    "movdqa   "TAN3", %%xmm1          \n\t" \
    "movdqa   "TAN1", %%xmm3          \n\t" \
    "pmulhw   %%xmm4, "TAN3"          \n\t" \
    "pmulhw   %%xmm5, %%xmm1          \n\t" \
    "paddsw   %%xmm4, "TAN3"          \n\t" \
    "paddsw   %%xmm5, %%xmm1          \n\t" \
    "psubsw   %%xmm5, "TAN3"          \n\t" \
    "paddsw   %%xmm4, %%xmm1          \n\t" \
    "pmulhw   %%xmm7, %%xmm3          \n\t" \
    "pmulhw   %%xmm6, "TAN1"          \n\t" \
    "paddsw   %%xmm6, %%xmm3          \n\t" \
    "psubsw   %%xmm7, "TAN1"          \n\t" \
    "movdqa   %%xmm3, %%xmm7          \n\t" \
    "movdqa   "TAN1", %%xmm6          \n\t" \
    "psubsw   %%xmm1, %%xmm3          \n\t" \
    "psubsw   "TAN3", "TAN1"          \n\t" \
    "paddsw   %%xmm7, %%xmm1          \n\t" \
    "paddsw   %%xmm6, "TAN3"          \n\t" \
    "movdqa   %%xmm3, %%xmm6          \n\t" \
    "psubsw   "TAN3", %%xmm3          \n\t" \
    "paddsw   %%xmm6, "TAN3"          \n\t" \
    "movdqa   "MANGLE(sqrt2)", %%xmm4 \n\t" \
    "pmulhw   %%xmm4, %%xmm3          \n\t" \
    "pmulhw   %%xmm4, "TAN3"          \n\t" \
    "paddsw   "TAN3", "TAN3"          \n\t" \
    "paddsw   %%xmm3, %%xmm3          \n\t" \
    "movdqa   "MANGLE(tan2)", %%xmm7  \n\t" \
    MOV_32_ONLY ROW2", "REG2"         \n\t" \
    MOV_32_ONLY ROW6", "REG6"         \n\t" \
    "movdqa   %%xmm7, %%xmm5          \n\t" \
    "pmulhw   "REG6", %%xmm7          \n\t" \
    "pmulhw   "REG2", %%xmm5          \n\t" \
    "paddsw   "REG2", %%xmm7          \n\t" \
    "psubsw   "REG6", %%xmm5          \n\t" \
    MOV_32_ONLY ROW0", "REG0"         \n\t" \
    MOV_32_ONLY ROW4", "REG4"         \n\t" \
    MOV_32_ONLY"  "TAN1", (%0)        \n\t" \
    "movdqa   "REG0", "XMMS"          \n\t" \
    "psubsw   "REG4", "REG0"          \n\t" \
    "paddsw   "XMMS", "REG4"          \n\t" \
    "movdqa   "REG4", "XMMS"          \n\t" \
    "psubsw   %%xmm7, "REG4"          \n\t" \
    "paddsw   "XMMS", %%xmm7          \n\t" \
    "movdqa   "REG0", "XMMS"          \n\t" \
    "psubsw   %%xmm5, "REG0"          \n\t" \
    "paddsw   "XMMS", %%xmm5          \n\t" \
    "movdqa   %%xmm5, "XMMS"          \n\t" \
    "psubsw   "TAN3", %%xmm5          \n\t" \
    "paddsw   "XMMS", "TAN3"          \n\t" \
    "movdqa   "REG0", "XMMS"          \n\t" \
    "psubsw   %%xmm3, "REG0"          \n\t" \
    "paddsw   "XMMS", %%xmm3          \n\t" \
    MOV_32_ONLY"  (%0), "TAN1"        \n\t" \
    "psraw        $6, %%xmm5          \n\t" \
    "psraw        $6, "REG0"          \n\t" \
    "psraw        $6, "TAN3"          \n\t" \
    "psraw        $6, %%xmm3          \n\t" \
    "movdqa   "TAN3", 1*16("dct")     \n\t" \
    "movdqa   %%xmm3, 2*16("dct")     \n\t" \
    "movdqa   "REG0", 5*16("dct")     \n\t" \
    "movdqa   %%xmm5, 6*16("dct")     \n\t" \
    "movdqa   %%xmm7, %%xmm0          \n\t" \
    "movdqa   "REG4", %%xmm4          \n\t" \
    "psubsw   %%xmm1, %%xmm7          \n\t" \
    "psubsw   "TAN1", "REG4"          \n\t" \
    "paddsw   %%xmm0, %%xmm1          \n\t" \
    "paddsw   %%xmm4, "TAN1"          \n\t" \
    "psraw        $6, %%xmm1          \n\t" \
    "psraw        $6, %%xmm7          \n\t" \
    "psraw        $6, "TAN1"          \n\t" \
    "psraw        $6, "REG4"          \n\t" \
    "movdqa   %%xmm1, ("dct")         \n\t" \
    "movdqa   "TAN1", 3*16("dct")     \n\t" \
    "movdqa   "REG4", 4*16("dct")     \n\t" \
    "movdqa   %%xmm7, 7*16("dct")     \n\t"

///IDCT pass on columns, assuming rows 4-7 are zero.
#define iLLM_PASS_SPARSE(dct)               \
    "pmulhw   %%xmm4, "TAN3"          \n\t" \
    "paddsw   %%xmm4, "TAN3"          \n\t" \
    "movdqa   %%xmm6, %%xmm3          \n\t" \
    "pmulhw   %%xmm6, "TAN1"          \n\t" \
    "movdqa   %%xmm4, %%xmm1          \n\t" \
    "psubsw   %%xmm1, %%xmm3          \n\t" \
    "paddsw   %%xmm6, %%xmm1          \n\t" \
    "movdqa   "TAN1", %%xmm6          \n\t" \
    "psubsw   "TAN3", "TAN1"          \n\t" \
    "paddsw   %%xmm6, "TAN3"          \n\t" \
    "movdqa   %%xmm3, %%xmm6          \n\t" \
    "psubsw   "TAN3", %%xmm3          \n\t" \
    "paddsw   %%xmm6, "TAN3"          \n\t" \
    "movdqa   "MANGLE(sqrt2)", %%xmm4 \n\t" \
    "pmulhw   %%xmm4, %%xmm3          \n\t" \
    "pmulhw   %%xmm4, "TAN3"          \n\t" \
    "paddsw   "TAN3", "TAN3"          \n\t" \
    "paddsw   %%xmm3, %%xmm3          \n\t" \
    "movdqa   "MANGLE(tan2)", %%xmm5  \n\t" \
    MOV_32_ONLY ROW2", "SREG2"        \n\t" \
    "pmulhw   "SREG2", %%xmm5         \n\t" \
    MOV_32_ONLY ROW0", "REG0"         \n\t" \
    "movdqa   "REG0", %%xmm6          \n\t" \
    "psubsw   "SREG2", %%xmm6         \n\t" \
    "paddsw   "REG0", "SREG2"         \n\t" \
    MOV_32_ONLY"  "TAN1", (%0)        \n\t" \
    "movdqa   "REG0", "XMMS"          \n\t" \
    "psubsw   %%xmm5, "REG0"          \n\t" \
    "paddsw   "XMMS", %%xmm5          \n\t" \
    "movdqa   %%xmm5, "XMMS"          \n\t" \
    "psubsw   "TAN3", %%xmm5          \n\t" \
    "paddsw   "XMMS", "TAN3"          \n\t" \
    "movdqa   "REG0", "XMMS"          \n\t" \
    "psubsw   %%xmm3, "REG0"          \n\t" \
    "paddsw   "XMMS", %%xmm3          \n\t" \
    MOV_32_ONLY"  (%0), "TAN1"        \n\t" \
    "psraw        $6, %%xmm5          \n\t" \
    "psraw        $6, "REG0"          \n\t" \
    "psraw        $6, "TAN3"          \n\t" \
    "psraw        $6, %%xmm3          \n\t" \
    "movdqa   "TAN3", 1*16("dct")     \n\t" \
    "movdqa   %%xmm3, 2*16("dct")     \n\t" \
    "movdqa   "REG0", 5*16("dct")     \n\t" \
    "movdqa   %%xmm5, 6*16("dct")     \n\t" \
    "movdqa   "SREG2", %%xmm0         \n\t" \
    "movdqa   %%xmm6, %%xmm4          \n\t" \
    "psubsw   %%xmm1, "SREG2"         \n\t" \
    "psubsw   "TAN1", %%xmm6          \n\t" \
    "paddsw   %%xmm0, %%xmm1          \n\t" \
    "paddsw   %%xmm4, "TAN1"          \n\t" \
    "psraw        $6, %%xmm1          \n\t" \
    "psraw        $6, "SREG2"         \n\t" \
    "psraw        $6, "TAN1"          \n\t" \
    "psraw        $6, %%xmm6          \n\t" \
    "movdqa   %%xmm1, ("dct")         \n\t" \
    "movdqa   "TAN1", 3*16("dct")     \n\t" \
    "movdqa   %%xmm6, 4*16("dct")     \n\t" \
    "movdqa   "SREG2", 7*16("dct")    \n\t"

inline void ff_idct_xvid_sse2(short *block)
{
    __asm__ volatile(
    "movq     "MANGLE(m127)", %%mm0                              \n\t"
    iMTX_MULT("(%0)",     MANGLE(iTab1), ROUND(walkenIdctRounders),      PUT_EVEN(ROW0))
    iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1))
    iMTX_MULT("2*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+2*16), PUT_EVEN(ROW2))

    TEST_TWO_ROWS("3*16(%0)", "4*16(%0)", "%%eax", "%%ecx", CLEAR_ODD(ROW3), CLEAR_EVEN(ROW4))
    JZ("%%eax", "1f")
    iMTX_MULT("3*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+3*16), PUT_ODD(ROW3))

    TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6))
    TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7))
    iLLM_HEAD
    ASMALIGN(4)
    JNZ("%%ecx", "2f")
    JNZ("%%eax", "3f")
    JNZ("%%edx", "4f")
    JNZ("%%esi", "5f")
    iLLM_PASS_SPARSE("%0")
    "jmp 6f                                                      \n\t"
    "2:                                                          \n\t"
    iMTX_MULT("4*16(%0)", MANGLE(iTab1), "#", PUT_EVEN(ROW4))
    "3:                                                          \n\t"
    iMTX_MULT("5*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+4*16), PUT_ODD(ROW5))
    JZ("%%edx", "1f")
    "4:                                                          \n\t"
    iMTX_MULT("6*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+5*16), PUT_EVEN(ROW6))
    JZ("%%esi", "1f")
    "5:                                                          \n\t"
    iMTX_MULT("7*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+5*16), PUT_ODD(ROW7))
#if !ARCH_X86_64
    iLLM_HEAD
#endif
    iLLM_PASS("%0")
    "6:                                                          \n\t"
    : "+r"(block)
    :
    : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" ,
                   "%xmm4" , "%xmm5" , "%xmm6" , "%xmm7" ,)
#if ARCH_X86_64
      XMM_CLOBBERS("%xmm8" , "%xmm9" , "%xmm10", "%xmm11",
                   "%xmm12", "%xmm13", "%xmm14",)
#endif
      "%eax", "%ecx", "%edx", "%esi", "memory"
    );
}

void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block)
{
    ff_idct_xvid_sse2(block);
    ff_put_pixels_clamped_mmx(block, dest, line_size);
}

void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block)
{
    ff_idct_xvid_sse2(block);
    ff_add_pixels_clamped_mmx(block, dest, line_size);
}
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`/*`
			`* XVID MPEG-4 VIDEO CODEC`
			`* - SSE2 inverse discrete cosine transform -`
			`*`
			`* Copyright(C) 2003 Pascal Massimino <skal@planet-d.net>`
			`*`
			`* Conversion to gcc syntax with modifications`
			`* by Alexander Strange <astrange@ithinksw.com>`
			`*`
			`* Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid.`
			`*`
			`* This file is part of FFmpeg.`
			`*`
			`* Vertical pass is an implementation of the scheme:`
			`* Loeffler C., Ligtenberg A., and Moschytz C.S.:`
			`* Practical Fast 1D DCT Algorithm with Eleven Multiplications,`
			`* Proc. ICASSP 1989, 988-991.`
			`*`
			`* Horizontal pass is a double 4x4 vector/matrix multiplication,`
			`* (see also Intel's Application Note 922:`
			`* http://developer.intel.com/vtune/cbts/strmsimd/922down.htm`
			`* Copyright (C) 1999 Intel Corporation)`
			`*`
			`* More details at http://skal.planet-d.net/coding/dct.html`
			`*`
			`* FFmpeg is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2.1 of the License, or (at your option) any later version.`
			`*`
			`* FFmpeg is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public License`
			`* along with FFmpeg; if not, write to the Free Software Foundation,`
			`* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA`
			`*/`

Use full path for #includes from another directory. Originally committed as revision 13098 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`#include "libavcodec/dsputil.h"`
idct_sse2_xvid: add xmm registers to clobber list Originally committed as revision 25608 to svn://svn.ffmpeg.org/ffmpeg/trunk 14 years ago			`#include "libavutil/x86_cpu.h"`
Do not use full include path for headers in the same directory. Originally committed as revision 16260 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`#include "idct_xvid.h"`
Move declarations of some mmx functions to dsputil_mmx.h Originally committed as revision 19739 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`#include "dsputil_mmx.h"`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago
			`/*!`
Remove explicit filename from Doxygen @file commands. Passing an explicit filename to this command is only necessary if the documentation in the @file block refers to a file different from the one the block resides in. Originally committed as revision 22921 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`* @file`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`* @brief SSE2 idct compatible with xvidmmx`
			`*/`

			`#define X8(x) x,x,x,x,x,x,x,x`

			`#define ROW_SHIFT 11`
			`#define COL_SHIFT 6`

Move array specifiers outside DECLARE_ALIGNED() invocations Originally committed as revision 21377 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`DECLARE_ASM_CONST(16, int16_t, tan1)[] = {X8(13036)}; // tan( pi/16)`
			`DECLARE_ASM_CONST(16, int16_t, tan2)[] = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1`
			`DECLARE_ASM_CONST(16, int16_t, tan3)[] = {X8(43790)}; // tan(3pi/16)-1`
			`DECLARE_ASM_CONST(16, int16_t, sqrt2)[]= {X8(23170)}; // 0.5/sqrt(2)`
			`DECLARE_ASM_CONST(8, uint8_t, m127)[] = {X8(127)};`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago
Move array specifiers outside DECLARE_ALIGNED() invocations Originally committed as revision 21377 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`DECLARE_ASM_CONST(16, int16_t, iTab1)[] = {`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d,`
			`0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61,`
			`0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7,`
			`0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b`
			`};`

Move array specifiers outside DECLARE_ALIGNED() invocations Originally committed as revision 21377 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`DECLARE_ASM_CONST(16, int16_t, iTab2)[] = {`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5,`
			`0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04,`
			`0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41,`
			`0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df`
			`};`

Move array specifiers outside DECLARE_ALIGNED() invocations Originally committed as revision 21377 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`DECLARE_ASM_CONST(16, int16_t, iTab3)[] = {`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf,`
			`0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf,`
			`0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d,`
			`0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04`
			`};`

Move array specifiers outside DECLARE_ALIGNED() invocations Originally committed as revision 21377 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`DECLARE_ASM_CONST(16, int16_t, iTab4)[] = {`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746,`
			`0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac,`
			`0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df,`
			`0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e`
			`};`

Move array specifiers outside DECLARE_ALIGNED() invocations Originally committed as revision 21377 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders)[] = {`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`65536, 65536, 65536, 65536,`
			`3597, 3597, 3597, 3597,`
			`2260, 2260, 2260, 2260,`
			`1203, 1203, 1203, 1203,`
			`120, 120, 120, 120,`
			`512, 512, 512, 512`
			`};`

			`// Temporary storage before the column pass`
			`#define ROW1 "%%xmm6"`
			`#define ROW3 "%%xmm4"`
			`#define ROW5 "%%xmm5"`
			`#define ROW7 "%%xmm7"`

			`#define CLEAR_ODD(r) "pxor "r","r" \n\t"`
			`#define PUT_ODD(dst) "pshufhw $0x1B, %%xmm2, "dst" \n\t"`

Change semantic of CONFIG_, HAVE_ and ARCH_*. They are now always defined to either 0 or 1. Originally committed as revision 16590 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`#if ARCH_X86_64`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago
			`# define ROW0 "%%xmm8"`
			`# define REG0 ROW0`
			`# define ROW2 "%%xmm9"`
			`# define REG2 ROW2`
			`# define ROW4 "%%xmm10"`
			`# define REG4 ROW4`
			`# define ROW6 "%%xmm11"`
			`# define REG6 ROW6`
			`# define CLEAR_EVEN(r) CLEAR_ODD(r)`
			`# define PUT_EVEN(dst) PUT_ODD(dst)`
			`# define XMMS "%%xmm12"`
			`# define MOV_32_ONLY "#"`
			`# define SREG2 REG2`
			`# define TAN3 "%%xmm13"`
			`# define TAN1 "%%xmm14"`

			`#else`

			`# define ROW0 "(%0)"`
			`# define REG0 "%%xmm4"`
			`# define ROW2 "2*16(%0)"`
			`# define REG2 "%%xmm4"`
			`# define ROW4 "4*16(%0)"`
			`# define REG4 "%%xmm6"`
			`# define ROW6 "6*16(%0)"`
			`# define REG6 "%%xmm6"`
			`# define CLEAR_EVEN(r)`
			`# define PUT_EVEN(dst) \`
			`"pshufhw $0x1B, %%xmm2, %%xmm2 \n\t" \`
			`"movdqa %%xmm2, "dst" \n\t"`
			`# define XMMS "%%xmm2"`
			`# define MOV_32_ONLY "movdqa "`
			`# define SREG2 "%%xmm7"`
			`# define TAN3 "%%xmm0"`
			`# define TAN1 "%%xmm2"`

			`#endif`

			`#define ROUND(x) "paddd "MANGLE(x)`

			`#define JZ(reg, to) \`
			`"testl "reg","reg" \n\t" \`
			`"jz "to" \n\t"`

			`#define JNZ(reg, to) \`
			`"testl "reg","reg" \n\t" \`
			`"jnz "to" \n\t"`

			`#define TEST_ONE_ROW(src, reg, clear) \`
			`clear \`
			`"movq "src", %%mm1 \n\t" \`
			`"por 8+"src", %%mm1 \n\t" \`
			`"paddusb %%mm0, %%mm1 \n\t" \`
			`"pmovmskb %%mm1, "reg" \n\t"`

			`#define TEST_TWO_ROWS(row1, row2, reg1, reg2, clear1, clear2) \`
			`clear1 \`
			`clear2 \`
			`"movq "row1", %%mm1 \n\t" \`
			`"por 8+"row1", %%mm1 \n\t" \`
			`"movq "row2", %%mm2 \n\t" \`
			`"por 8+"row2", %%mm2 \n\t" \`
			`"paddusb %%mm0, %%mm1 \n\t" \`
			`"paddusb %%mm0, %%mm2 \n\t" \`
			`"pmovmskb %%mm1, "reg1" \n\t" \`
			`"pmovmskb %%mm2, "reg2" \n\t"`

			`///IDCT pass on rows.`
			`#define iMTX_MULT(src, table, rounder, put) \`
			`"movdqa "src", %%xmm3 \n\t" \`
			`"movdqa %%xmm3, %%xmm0 \n\t" \`
			`"pshufd $0x11, %%xmm3, %%xmm1 \n\t" /* 4602 */ \`
			`"punpcklqdq %%xmm0, %%xmm0 \n\t" /* 0246 */ \`
			`"pmaddwd "table", %%xmm0 \n\t" \`
			`"pmaddwd 16+"table", %%xmm1 \n\t" \`
			`"pshufd $0xBB, %%xmm3, %%xmm2 \n\t" /* 5713 */ \`
			`"punpckhqdq %%xmm3, %%xmm3 \n\t" /* 1357 */ \`
			`"pmaddwd 32+"table", %%xmm2 \n\t" \`
			`"pmaddwd 48+"table", %%xmm3 \n\t" \`
			`"paddd %%xmm1, %%xmm0 \n\t" \`
			`"paddd %%xmm3, %%xmm2 \n\t" \`
			`rounder", %%xmm0 \n\t" \`
			`"movdqa %%xmm2, %%xmm3 \n\t" \`
			`"paddd %%xmm0, %%xmm2 \n\t" \`
			`"psubd %%xmm3, %%xmm0 \n\t" \`
			`"psrad $11, %%xmm2 \n\t" \`
			`"psrad $11, %%xmm0 \n\t" \`
			`"packssdw %%xmm0, %%xmm2 \n\t" \`
			`put \`
			`"1: \n\t"`

			`#define iLLM_HEAD \`
			`"movdqa "MANGLE(tan3)", "TAN3" \n\t" \`
			`"movdqa "MANGLE(tan1)", "TAN1" \n\t" \`

			`///IDCT pass on columns.`
			`#define iLLM_PASS(dct) \`
			`"movdqa "TAN3", %%xmm1 \n\t" \`
			`"movdqa "TAN1", %%xmm3 \n\t" \`
			`"pmulhw %%xmm4, "TAN3" \n\t" \`
			`"pmulhw %%xmm5, %%xmm1 \n\t" \`
			`"paddsw %%xmm4, "TAN3" \n\t" \`
			`"paddsw %%xmm5, %%xmm1 \n\t" \`
			`"psubsw %%xmm5, "TAN3" \n\t" \`
			`"paddsw %%xmm4, %%xmm1 \n\t" \`
			`"pmulhw %%xmm7, %%xmm3 \n\t" \`
			`"pmulhw %%xmm6, "TAN1" \n\t" \`
			`"paddsw %%xmm6, %%xmm3 \n\t" \`
			`"psubsw %%xmm7, "TAN1" \n\t" \`
			`"movdqa %%xmm3, %%xmm7 \n\t" \`
			`"movdqa "TAN1", %%xmm6 \n\t" \`
			`"psubsw %%xmm1, %%xmm3 \n\t" \`
			`"psubsw "TAN3", "TAN1" \n\t" \`
			`"paddsw %%xmm7, %%xmm1 \n\t" \`
			`"paddsw %%xmm6, "TAN3" \n\t" \`
			`"movdqa %%xmm3, %%xmm6 \n\t" \`
			`"psubsw "TAN3", %%xmm3 \n\t" \`
			`"paddsw %%xmm6, "TAN3" \n\t" \`
			`"movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \`
			`"pmulhw %%xmm4, %%xmm3 \n\t" \`
			`"pmulhw %%xmm4, "TAN3" \n\t" \`
			`"paddsw "TAN3", "TAN3" \n\t" \`
			`"paddsw %%xmm3, %%xmm3 \n\t" \`
			`"movdqa "MANGLE(tan2)", %%xmm7 \n\t" \`
			`MOV_32_ONLY ROW2", "REG2" \n\t" \`
			`MOV_32_ONLY ROW6", "REG6" \n\t" \`
			`"movdqa %%xmm7, %%xmm5 \n\t" \`
			`"pmulhw "REG6", %%xmm7 \n\t" \`
			`"pmulhw "REG2", %%xmm5 \n\t" \`
			`"paddsw "REG2", %%xmm7 \n\t" \`
			`"psubsw "REG6", %%xmm5 \n\t" \`
			`MOV_32_ONLY ROW0", "REG0" \n\t" \`
			`MOV_32_ONLY ROW4", "REG4" \n\t" \`
			`MOV_32_ONLY" "TAN1", (%0) \n\t" \`
			`"movdqa "REG0", "XMMS" \n\t" \`
			`"psubsw "REG4", "REG0" \n\t" \`
			`"paddsw "XMMS", "REG4" \n\t" \`
			`"movdqa "REG4", "XMMS" \n\t" \`
			`"psubsw %%xmm7, "REG4" \n\t" \`
			`"paddsw "XMMS", %%xmm7 \n\t" \`
			`"movdqa "REG0", "XMMS" \n\t" \`
			`"psubsw %%xmm5, "REG0" \n\t" \`
			`"paddsw "XMMS", %%xmm5 \n\t" \`
			`"movdqa %%xmm5, "XMMS" \n\t" \`
			`"psubsw "TAN3", %%xmm5 \n\t" \`
			`"paddsw "XMMS", "TAN3" \n\t" \`
			`"movdqa "REG0", "XMMS" \n\t" \`
			`"psubsw %%xmm3, "REG0" \n\t" \`
			`"paddsw "XMMS", %%xmm3 \n\t" \`
			`MOV_32_ONLY" (%0), "TAN1" \n\t" \`
			`"psraw $6, %%xmm5 \n\t" \`
			`"psraw $6, "REG0" \n\t" \`
			`"psraw $6, "TAN3" \n\t" \`
			`"psraw $6, %%xmm3 \n\t" \`
			`"movdqa "TAN3", 1*16("dct") \n\t" \`
			`"movdqa %%xmm3, 2*16("dct") \n\t" \`
			`"movdqa "REG0", 5*16("dct") \n\t" \`
			`"movdqa %%xmm5, 6*16("dct") \n\t" \`
			`"movdqa %%xmm7, %%xmm0 \n\t" \`
			`"movdqa "REG4", %%xmm4 \n\t" \`
			`"psubsw %%xmm1, %%xmm7 \n\t" \`
			`"psubsw "TAN1", "REG4" \n\t" \`
			`"paddsw %%xmm0, %%xmm1 \n\t" \`
			`"paddsw %%xmm4, "TAN1" \n\t" \`
			`"psraw $6, %%xmm1 \n\t" \`
			`"psraw $6, %%xmm7 \n\t" \`
			`"psraw $6, "TAN1" \n\t" \`
			`"psraw $6, "REG4" \n\t" \`
			`"movdqa %%xmm1, ("dct") \n\t" \`
			`"movdqa "TAN1", 3*16("dct") \n\t" \`
			`"movdqa "REG4", 4*16("dct") \n\t" \`
			`"movdqa %%xmm7, 7*16("dct") \n\t"`

			`///IDCT pass on columns, assuming rows 4-7 are zero.`
			`#define iLLM_PASS_SPARSE(dct) \`
			`"pmulhw %%xmm4, "TAN3" \n\t" \`
			`"paddsw %%xmm4, "TAN3" \n\t" \`
			`"movdqa %%xmm6, %%xmm3 \n\t" \`
			`"pmulhw %%xmm6, "TAN1" \n\t" \`
			`"movdqa %%xmm4, %%xmm1 \n\t" \`
			`"psubsw %%xmm1, %%xmm3 \n\t" \`
			`"paddsw %%xmm6, %%xmm1 \n\t" \`
			`"movdqa "TAN1", %%xmm6 \n\t" \`
			`"psubsw "TAN3", "TAN1" \n\t" \`
			`"paddsw %%xmm6, "TAN3" \n\t" \`
			`"movdqa %%xmm3, %%xmm6 \n\t" \`
			`"psubsw "TAN3", %%xmm3 \n\t" \`
			`"paddsw %%xmm6, "TAN3" \n\t" \`
			`"movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \`
			`"pmulhw %%xmm4, %%xmm3 \n\t" \`
			`"pmulhw %%xmm4, "TAN3" \n\t" \`
			`"paddsw "TAN3", "TAN3" \n\t" \`
			`"paddsw %%xmm3, %%xmm3 \n\t" \`
			`"movdqa "MANGLE(tan2)", %%xmm5 \n\t" \`
			`MOV_32_ONLY ROW2", "SREG2" \n\t" \`
			`"pmulhw "SREG2", %%xmm5 \n\t" \`
			`MOV_32_ONLY ROW0", "REG0" \n\t" \`
			`"movdqa "REG0", %%xmm6 \n\t" \`
			`"psubsw "SREG2", %%xmm6 \n\t" \`
			`"paddsw "REG0", "SREG2" \n\t" \`
			`MOV_32_ONLY" "TAN1", (%0) \n\t" \`
			`"movdqa "REG0", "XMMS" \n\t" \`
			`"psubsw %%xmm5, "REG0" \n\t" \`
			`"paddsw "XMMS", %%xmm5 \n\t" \`
			`"movdqa %%xmm5, "XMMS" \n\t" \`
			`"psubsw "TAN3", %%xmm5 \n\t" \`
			`"paddsw "XMMS", "TAN3" \n\t" \`
			`"movdqa "REG0", "XMMS" \n\t" \`
			`"psubsw %%xmm3, "REG0" \n\t" \`
			`"paddsw "XMMS", %%xmm3 \n\t" \`
			`MOV_32_ONLY" (%0), "TAN1" \n\t" \`
			`"psraw $6, %%xmm5 \n\t" \`
			`"psraw $6, "REG0" \n\t" \`
			`"psraw $6, "TAN3" \n\t" \`
			`"psraw $6, %%xmm3 \n\t" \`
			`"movdqa "TAN3", 1*16("dct") \n\t" \`
			`"movdqa %%xmm3, 2*16("dct") \n\t" \`
			`"movdqa "REG0", 5*16("dct") \n\t" \`
			`"movdqa %%xmm5, 6*16("dct") \n\t" \`
			`"movdqa "SREG2", %%xmm0 \n\t" \`
			`"movdqa %%xmm6, %%xmm4 \n\t" \`
			`"psubsw %%xmm1, "SREG2" \n\t" \`
			`"psubsw "TAN1", %%xmm6 \n\t" \`
			`"paddsw %%xmm0, %%xmm1 \n\t" \`
			`"paddsw %%xmm4, "TAN1" \n\t" \`
			`"psraw $6, %%xmm1 \n\t" \`
			`"psraw $6, "SREG2" \n\t" \`
			`"psraw $6, "TAN1" \n\t" \`
			`"psraw $6, %%xmm6 \n\t" \`
			`"movdqa %%xmm1, ("dct") \n\t" \`
			`"movdqa "TAN1", 3*16("dct") \n\t" \`
			`"movdqa %%xmm6, 4*16("dct") \n\t" \`
			`"movdqa "SREG2", 7*16("dct") \n\t"`

			`inline void ff_idct_xvid_sse2(short *block)`
			`{`
Convert asm keyword into __asm__. Neither the asm() nor the __asm__() keyword is part of the C99 standard, but while GCC accepts the former in C89 syntax, it is not accepted in C99 unless GNU extensions are turned on (with -fasm). The latter form is accepted in any syntax as an extension (without requiring further command-line options). Sun Studio C99 compiler also does not accept asm() while accepting __asm__(), albeit reporting warnings that it's not valid C99 syntax. Originally committed as revision 15627 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`__asm__ volatile(`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`"movq "MANGLE(m127)", %%mm0 \n\t"`
			`iMTX_MULT("(%0)", MANGLE(iTab1), ROUND(walkenIdctRounders), PUT_EVEN(ROW0))`
			`iMTX_MULT("116(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+116), PUT_ODD(ROW1))`
			`iMTX_MULT("216(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+216), PUT_EVEN(ROW2))`

			`TEST_TWO_ROWS("316(%0)", "416(%0)", "%%eax", "%%ecx", CLEAR_ODD(ROW3), CLEAR_EVEN(ROW4))`
			`JZ("%%eax", "1f")`
			`iMTX_MULT("316(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+316), PUT_ODD(ROW3))`

			`TEST_TWO_ROWS("516(%0)", "616(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6))`
			`TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7))`
			`iLLM_HEAD`
			`ASMALIGN(4)`
			`JNZ("%%ecx", "2f")`
			`JNZ("%%eax", "3f")`
			`JNZ("%%edx", "4f")`
			`JNZ("%%esi", "5f")`
			`iLLM_PASS_SPARSE("%0")`
			`"jmp 6f \n\t"`
			`"2: \n\t"`
			`iMTX_MULT("4*16(%0)", MANGLE(iTab1), "#", PUT_EVEN(ROW4))`
			`"3: \n\t"`
			`iMTX_MULT("516(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+416), PUT_ODD(ROW5))`
			`JZ("%%edx", "1f")`
			`"4: \n\t"`
			`iMTX_MULT("616(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+516), PUT_EVEN(ROW6))`
			`JZ("%%esi", "1f")`
			`"5: \n\t"`
			`iMTX_MULT("716(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+516), PUT_ODD(ROW7))`
Change semantic of CONFIG_, HAVE_ and ARCH_*. They are now always defined to either 0 or 1. Originally committed as revision 16590 to svn://svn.ffmpeg.org/ffmpeg/trunk 16 years ago			`#if !ARCH_X86_64`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`iLLM_HEAD`
			`#endif`
			`iLLM_PASS("%0")`
			`"6: \n\t"`
			`: "+r"(block)`
			`:`
xmm_clobbers: list xmm registers first in clobber list suncc does not like the leading commas inside the macro, but it has no problem with trailing commas. Originally committed as revision 25615 to svn://svn.ffmpeg.org/ffmpeg/trunk 14 years ago			`: XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" ,`
			`"%xmm4" , "%xmm5" , "%xmm6" , "%xmm7" ,)`
idct_sse2_xvid: only mark xmm>=8 as clobbered on x86_64 Originally committed as revision 25614 to svn://svn.ffmpeg.org/ffmpeg/trunk 14 years ago			`#if ARCH_X86_64`
xmm_clobbers: list xmm registers first in clobber list suncc does not like the leading commas inside the macro, but it has no problem with trailing commas. Originally committed as revision 25615 to svn://svn.ffmpeg.org/ffmpeg/trunk 14 years ago			`XMM_CLOBBERS("%xmm8" , "%xmm9" , "%xmm10", "%xmm11",`
			`"%xmm12", "%xmm13", "%xmm14",)`
idct_sse2_xvid: only mark xmm>=8 as clobbered on x86_64 Originally committed as revision 25614 to svn://svn.ffmpeg.org/ffmpeg/trunk 14 years ago			`#endif`
xmm_clobbers: list xmm registers first in clobber list suncc does not like the leading commas inside the macro, but it has no problem with trailing commas. Originally committed as revision 25615 to svn://svn.ffmpeg.org/ffmpeg/trunk 14 years ago			`"%eax", "%ecx", "%edx", "%esi", "memory"`
idct_sse2_xvid: add xmm registers to clobber list Originally committed as revision 25608 to svn://svn.ffmpeg.org/ffmpeg/trunk 14 years ago			`);`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`}`

			`void ff_idct_xvid_sse2_put(uint8_t dest, int line_size, short block)`
			`{`
			`ff_idct_xvid_sse2(block);`
Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx() functions. Originally committed as revision 24987 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`ff_put_pixels_clamped_mmx(block, dest, line_size);`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`}`

			`void ff_idct_xvid_sse2_add(uint8_t dest, int line_size, short block)`
			`{`
			`ff_idct_xvid_sse2(block);`
Put ff_ prefix on non-static {put_signed,put,add}_pixels_clamped_mmx() functions. Originally committed as revision 24987 to svn://svn.ffmpeg.org/ffmpeg/trunk 15 years ago			`ff_add_pixels_clamped_mmx(block, dest, line_size);`
Add a new xvid-style IDCT using SSE2. Originally committed as revision 12843 to svn://svn.ffmpeg.org/ffmpeg/trunk 17 years ago			`}`