mirror of https://github.com/FFmpeg/FFmpeg.git
parent
869fc416f7
commit
e74433a8e6
53 changed files with 677 additions and 361 deletions
@ -0,0 +1,26 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVCODEC_ARM_BLOCKDSP_ARM_H |
||||||
|
#define AVCODEC_ARM_BLOCKDSP_ARM_H |
||||||
|
|
||||||
|
#include "libavcodec/blockdsp.h" |
||||||
|
|
||||||
|
void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth); |
||||||
|
|
||||||
|
#endif /* AVCODEC_ARM_BLOCKDSP_ARM_H */ |
@ -0,0 +1,33 @@ |
|||||||
|
/*
|
||||||
|
* ARM optimized block operations |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/arm/cpu.h" |
||||||
|
#include "libavcodec/blockdsp.h" |
||||||
|
#include "blockdsp_arm.h" |
||||||
|
|
||||||
|
av_cold void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth) |
||||||
|
{ |
||||||
|
int cpu_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (have_neon(cpu_flags)) |
||||||
|
ff_blockdsp_init_neon(c, high_bit_depth); |
||||||
|
} |
@ -0,0 +1,37 @@ |
|||||||
|
/*
|
||||||
|
* ARM NEON optimised block operations |
||||||
|
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavcodec/blockdsp.h" |
||||||
|
#include "blockdsp_arm.h" |
||||||
|
|
||||||
|
void ff_clear_block_neon(int16_t *block); |
||||||
|
void ff_clear_blocks_neon(int16_t *blocks); |
||||||
|
|
||||||
|
av_cold void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth) |
||||||
|
{ |
||||||
|
if (!high_bit_depth) { |
||||||
|
c->clear_block = ff_clear_block_neon; |
||||||
|
c->clear_blocks = ff_clear_blocks_neon; |
||||||
|
} |
||||||
|
} |
@ -0,0 +1,38 @@ |
|||||||
|
/* |
||||||
|
* ARM NEON optimised block functions |
||||||
|
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
|
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "libavutil/arm/asm.S" |
||||||
|
|
||||||
|
function ff_clear_block_neon, export=1 |
||||||
|
vmov.i16 q0, #0 |
||||||
|
.rept 8
|
||||||
|
vst1.16 {q0}, [r0,:128]! |
||||||
|
.endr |
||||||
|
bx lr |
||||||
|
endfunc |
||||||
|
|
||||||
|
function ff_clear_blocks_neon, export=1 |
||||||
|
vmov.i16 q0, #0 |
||||||
|
.rept 8*6 |
||||||
|
vst1.16 {q0}, [r0,:128]! |
||||||
|
.endr |
||||||
|
bx lr |
||||||
|
endfunc |
@ -0,0 +1,78 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
#include <string.h> |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "avcodec.h" |
||||||
|
#include "blockdsp.h" |
||||||
|
#include "version.h" |
||||||
|
|
||||||
|
static void clear_block_8_c(int16_t *block) |
||||||
|
{ |
||||||
|
memset(block, 0, sizeof(int16_t) * 64); |
||||||
|
} |
||||||
|
|
||||||
|
static void clear_blocks_8_c(int16_t *blocks) |
||||||
|
{ |
||||||
|
memset(blocks, 0, sizeof(int16_t) * 6 * 64); |
||||||
|
} |
||||||
|
|
||||||
|
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
|
||||||
|
for (i = 0; i < h; i++) { |
||||||
|
memset(block, value, 16); |
||||||
|
block += line_size; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h) |
||||||
|
{ |
||||||
|
int i; |
||||||
|
|
||||||
|
for (i = 0; i < h; i++) { |
||||||
|
memset(block, value, 8); |
||||||
|
block += line_size; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx) |
||||||
|
{ |
||||||
|
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; |
||||||
|
|
||||||
|
c->clear_block = clear_block_8_c; |
||||||
|
c->clear_blocks = clear_blocks_8_c; |
||||||
|
|
||||||
|
c->fill_block_tab[0] = fill_block16_c; |
||||||
|
c->fill_block_tab[1] = fill_block8_c; |
||||||
|
|
||||||
|
if (ARCH_ARM) |
||||||
|
ff_blockdsp_init_arm(c, high_bit_depth); |
||||||
|
if (ARCH_PPC) |
||||||
|
ff_blockdsp_init_ppc(c, high_bit_depth); |
||||||
|
if (ARCH_X86) |
||||||
|
#if FF_API_XVMC |
||||||
|
ff_blockdsp_init_x86(c, high_bit_depth, avctx); |
||||||
|
#else |
||||||
|
ff_blockdsp_init_x86(c, high_bit_depth); |
||||||
|
#endif /* FF_API_XVMC */ |
||||||
|
} |
@ -0,0 +1,52 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#ifndef AVCODEC_BLOCKDSP_H |
||||||
|
#define AVCODEC_BLOCKDSP_H |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include "avcodec.h" |
||||||
|
#include "version.h" |
||||||
|
|
||||||
|
/* add and put pixel (decoding)
|
||||||
|
* Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. |
||||||
|
* h for op_pixels_func is limited to { width / 2, width }, |
||||||
|
* but never larger than 16 and never smaller than 4. */ |
||||||
|
typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */, |
||||||
|
uint8_t value, int line_size, int h); |
||||||
|
|
||||||
|
typedef struct BlockDSPContext { |
||||||
|
void (*clear_block)(int16_t *block /* align 16 */); |
||||||
|
void (*clear_blocks)(int16_t *blocks /* align 16 */); |
||||||
|
|
||||||
|
op_fill_func fill_block_tab[2]; |
||||||
|
} BlockDSPContext; |
||||||
|
|
||||||
|
void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx); |
||||||
|
|
||||||
|
void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth); |
||||||
|
void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth); |
||||||
|
#if FF_API_XVMC |
||||||
|
void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth, |
||||||
|
AVCodecContext *avctx); |
||||||
|
#else |
||||||
|
void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth); |
||||||
|
#endif /* FF_API_XVMC */ |
||||||
|
|
||||||
|
#endif /* AVCODEC_BLOCKDSP_H */ |
@ -0,0 +1,169 @@ |
|||||||
|
/*
|
||||||
|
* Copyright (c) 2002 Brian Foley |
||||||
|
* Copyright (c) 2002 Dieter Shirley |
||||||
|
* Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> |
||||||
|
* |
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#if HAVE_ALTIVEC_H |
||||||
|
#include <altivec.h> |
||||||
|
#endif |
||||||
|
#include <string.h> |
||||||
|
|
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/mem.h" |
||||||
|
#include "libavutil/ppc/cpu.h" |
||||||
|
#include "libavutil/ppc/types_altivec.h" |
||||||
|
#include "libavcodec/blockdsp.h" |
||||||
|
|
||||||
|
/* ***** WARNING ***** WARNING ***** WARNING ***** */ |
||||||
|
/*
|
||||||
|
* clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with |
||||||
|
* a cache line size not equal to 32 bytes. Fortunately all processors used |
||||||
|
* by Apple up to at least the 7450 (AKA second generation G4) use 32-byte |
||||||
|
* cache lines. This is due to the use of the 'dcbz' instruction. It simply |
||||||
|
* clears a single cache line to zero, so you need to know the cache line |
||||||
|
* size to use it! It's absurd, but it's fast... |
||||||
|
* |
||||||
|
* update 24/06/2003: Apple released the G5 yesterday, with a PPC970. |
||||||
|
* cache line size: 128 bytes. Oups. |
||||||
|
* The semantics of dcbz was changed, it always clears 32 bytes. So the function |
||||||
|
* below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl, |
||||||
|
* which is defined to clear a cache line (as dcbz before). So we can still |
||||||
|
* distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required. |
||||||
|
* |
||||||
|
* see <http://developer.apple.com/technotes/tn/tn2087.html>
|
||||||
|
* and <http://developer.apple.com/technotes/tn/tn2086.html>
|
||||||
|
*/ |
||||||
|
static void clear_blocks_dcbz32_ppc(int16_t *blocks) |
||||||
|
{ |
||||||
|
register int misal = (unsigned long) blocks & 0x00000010, i = 0; |
||||||
|
|
||||||
|
if (misal) { |
||||||
|
((unsigned long *) blocks)[0] = 0L; |
||||||
|
((unsigned long *) blocks)[1] = 0L; |
||||||
|
((unsigned long *) blocks)[2] = 0L; |
||||||
|
((unsigned long *) blocks)[3] = 0L; |
||||||
|
i += 16; |
||||||
|
} |
||||||
|
for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32) |
||||||
|
__asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory"); |
||||||
|
if (misal) { |
||||||
|
((unsigned long *) blocks)[188] = 0L; |
||||||
|
((unsigned long *) blocks)[189] = 0L; |
||||||
|
((unsigned long *) blocks)[190] = 0L; |
||||||
|
((unsigned long *) blocks)[191] = 0L; |
||||||
|
i += 16; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
/* Same as above, when dcbzl clears a whole 128 bytes cache line
|
||||||
|
* i.e. the PPC970 AKA G5. */ |
||||||
|
static void clear_blocks_dcbz128_ppc(int16_t *blocks) |
||||||
|
{ |
||||||
|
#if HAVE_DCBZL |
||||||
|
register int misal = (unsigned long) blocks & 0x0000007f, i = 0; |
||||||
|
|
||||||
|
if (misal) { |
||||||
|
/* We could probably also optimize this case,
|
||||||
|
* but there's not much point as the machines |
||||||
|
* aren't available yet (2003-06-26). */ |
||||||
|
memset(blocks, 0, sizeof(int16_t) * 6 * 64); |
||||||
|
} else { |
||||||
|
for (; i < sizeof(int16_t) * 6 * 64; i += 128) |
||||||
|
__asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory"); |
||||||
|
} |
||||||
|
#else |
||||||
|
memset(blocks, 0, sizeof(int16_t) * 6 * 64); |
||||||
|
#endif |
||||||
|
} |
||||||
|
|
||||||
|
/* Check dcbz report how many bytes are set to 0 by dcbz. */ |
||||||
|
/* update 24/06/2003: Replace dcbz by dcbzl to get the intended effect
|
||||||
|
* (Apple "fixed" dcbz). Unfortunately this cannot be used unless the |
||||||
|
* assembler knows about dcbzl ... */ |
||||||
|
static long check_dcbzl_effect(void) |
||||||
|
{ |
||||||
|
long count = 0; |
||||||
|
#if HAVE_DCBZL |
||||||
|
register char *fakedata = av_malloc(1024); |
||||||
|
register char *fakedata_middle; |
||||||
|
register long zero = 0, i = 0; |
||||||
|
|
||||||
|
if (!fakedata) |
||||||
|
return 0L; |
||||||
|
|
||||||
|
fakedata_middle = fakedata + 512; |
||||||
|
|
||||||
|
memset(fakedata, 0xFF, 1024); |
||||||
|
|
||||||
|
/* Below the constraint "b" seems to mean "address base register"
|
||||||
|
* in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */ |
||||||
|
__asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero)); |
||||||
|
|
||||||
|
for (i = 0; i < 1024; i++) |
||||||
|
if (fakedata[i] == (char) 0) |
||||||
|
count++; |
||||||
|
|
||||||
|
av_free(fakedata); |
||||||
|
#endif |
||||||
|
|
||||||
|
return count; |
||||||
|
} |
||||||
|
|
||||||
|
#if HAVE_ALTIVEC |
||||||
|
static void clear_block_altivec(int16_t *block) |
||||||
|
{ |
||||||
|
LOAD_ZERO; |
||||||
|
vec_st(zero_s16v, 0, block); |
||||||
|
vec_st(zero_s16v, 16, block); |
||||||
|
vec_st(zero_s16v, 32, block); |
||||||
|
vec_st(zero_s16v, 48, block); |
||||||
|
vec_st(zero_s16v, 64, block); |
||||||
|
vec_st(zero_s16v, 80, block); |
||||||
|
vec_st(zero_s16v, 96, block); |
||||||
|
vec_st(zero_s16v, 112, block); |
||||||
|
} |
||||||
|
#endif /* HAVE_ALTIVEC */ |
||||||
|
|
||||||
|
av_cold void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth) |
||||||
|
{ |
||||||
|
// common optimizations whether AltiVec is available or not
|
||||||
|
if (!high_bit_depth) { |
||||||
|
switch (check_dcbzl_effect()) { |
||||||
|
case 32: |
||||||
|
c->clear_blocks = clear_blocks_dcbz32_ppc; |
||||||
|
break; |
||||||
|
case 128: |
||||||
|
c->clear_blocks = clear_blocks_dcbz128_ppc; |
||||||
|
break; |
||||||
|
default: |
||||||
|
break; |
||||||
|
} |
||||||
|
} |
||||||
|
|
||||||
|
#if HAVE_ALTIVEC |
||||||
|
if (!PPC_ALTIVEC(av_get_cpu_flags())) |
||||||
|
return; |
||||||
|
|
||||||
|
if (!high_bit_depth) |
||||||
|
c->clear_block = clear_block_altivec; |
||||||
|
#endif /* HAVE_ALTIVEC */ |
||||||
|
} |
@ -0,0 +1,120 @@ |
|||||||
|
/*
|
||||||
|
* This file is part of Libav. |
||||||
|
* |
||||||
|
* Libav is free software; you can redistribute it and/or |
||||||
|
* modify it under the terms of the GNU Lesser General Public |
||||||
|
* License as published by the Free Software Foundation; either |
||||||
|
* version 2.1 of the License, or (at your option) any later version. |
||||||
|
* |
||||||
|
* Libav is distributed in the hope that it will be useful, |
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||||
|
* Lesser General Public License for more details. |
||||||
|
* |
||||||
|
* You should have received a copy of the GNU Lesser General Public |
||||||
|
* License along with Libav; if not, write to the Free Software |
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||||||
|
*/ |
||||||
|
|
||||||
|
#include <stdint.h> |
||||||
|
|
||||||
|
#include "config.h" |
||||||
|
#include "libavutil/attributes.h" |
||||||
|
#include "libavutil/internal.h" |
||||||
|
#include "libavutil/cpu.h" |
||||||
|
#include "libavutil/x86/asm.h" |
||||||
|
#include "libavutil/x86/cpu.h" |
||||||
|
#include "libavcodec/blockdsp.h" |
||||||
|
#include "libavcodec/version.h" |
||||||
|
|
||||||
|
#if HAVE_INLINE_ASM |
||||||
|
|
||||||
|
#define CLEAR_BLOCKS(name, n) \ |
||||||
|
static void name(int16_t *blocks) \
|
||||||
|
{ \
|
||||||
|
__asm__ volatile ( \
|
||||||
|
"pxor %%mm7, %%mm7 \n\t" \
|
||||||
|
"mov %1, %%"REG_a" \n\t" \
|
||||||
|
"1: \n\t" \
|
||||||
|
"movq %%mm7, (%0, %%"REG_a") \n\t" \
|
||||||
|
"movq %%mm7, 8(%0, %%"REG_a") \n\t" \
|
||||||
|
"movq %%mm7, 16(%0, %%"REG_a") \n\t" \
|
||||||
|
"movq %%mm7, 24(%0, %%"REG_a") \n\t" \
|
||||||
|
"add $32, %%"REG_a" \n\t" \
|
||||||
|
"js 1b \n\t" \
|
||||||
|
:: "r"(((uint8_t *) blocks) + 128 * n), \
|
||||||
|
"i"(-128 * n) \
|
||||||
|
: "%"REG_a); \
|
||||||
|
} |
||||||
|
CLEAR_BLOCKS(clear_blocks_mmx, 6) |
||||||
|
CLEAR_BLOCKS(clear_block_mmx, 1) |
||||||
|
|
||||||
|
static void clear_block_sse(int16_t *block) |
||||||
|
{ |
||||||
|
__asm__ volatile ( |
||||||
|
"xorps %%xmm0, %%xmm0 \n" |
||||||
|
"movaps %%xmm0, (%0) \n" |
||||||
|
"movaps %%xmm0, 16(%0) \n" |
||||||
|
"movaps %%xmm0, 32(%0) \n" |
||||||
|
"movaps %%xmm0, 48(%0) \n" |
||||||
|
"movaps %%xmm0, 64(%0) \n" |
||||||
|
"movaps %%xmm0, 80(%0) \n" |
||||||
|
"movaps %%xmm0, 96(%0) \n" |
||||||
|
"movaps %%xmm0, 112(%0) \n" |
||||||
|
:: "r" (block) |
||||||
|
: "memory"); |
||||||
|
} |
||||||
|
|
||||||
|
static void clear_blocks_sse(int16_t *blocks) |
||||||
|
{ |
||||||
|
__asm__ volatile ( |
||||||
|
"xorps %%xmm0, %%xmm0 \n" |
||||||
|
"mov %1, %%"REG_a" \n" |
||||||
|
"1: \n" |
||||||
|
"movaps %%xmm0, (%0, %%"REG_a") \n" |
||||||
|
"movaps %%xmm0, 16(%0, %%"REG_a") \n" |
||||||
|
"movaps %%xmm0, 32(%0, %%"REG_a") \n" |
||||||
|
"movaps %%xmm0, 48(%0, %%"REG_a") \n" |
||||||
|
"movaps %%xmm0, 64(%0, %%"REG_a") \n" |
||||||
|
"movaps %%xmm0, 80(%0, %%"REG_a") \n" |
||||||
|
"movaps %%xmm0, 96(%0, %%"REG_a") \n" |
||||||
|
"movaps %%xmm0, 112(%0, %%"REG_a") \n" |
||||||
|
"add $128, %%"REG_a" \n" |
||||||
|
"js 1b \n" |
||||||
|
:: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6) |
||||||
|
: "%"REG_a); |
||||||
|
} |
||||||
|
|
||||||
|
#endif /* HAVE_INLINE_ASM */ |
||||||
|
|
||||||
|
#if FF_API_XVMC |
||||||
|
av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth, |
||||||
|
AVCodecContext *avctx) |
||||||
|
#else |
||||||
|
av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth) |
||||||
|
#endif /* FF_API_XVMC */ |
||||||
|
{ |
||||||
|
#if HAVE_INLINE_ASM |
||||||
|
int cpu_flags = av_get_cpu_flags(); |
||||||
|
|
||||||
|
if (!high_bit_depth) { |
||||||
|
if (INLINE_MMX(cpu_flags)) { |
||||||
|
c->clear_block = clear_block_mmx; |
||||||
|
c->clear_blocks = clear_blocks_mmx; |
||||||
|
} |
||||||
|
|
||||||
|
#if FF_API_XVMC |
||||||
|
FF_DISABLE_DEPRECATION_WARNINGS |
||||||
|
/* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ |
||||||
|
if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1) |
||||||
|
return; |
||||||
|
FF_ENABLE_DEPRECATION_WARNINGS |
||||||
|
#endif /* FF_API_XVMC */ |
||||||
|
|
||||||
|
if (INLINE_SSE(cpu_flags)) { |
||||||
|
c->clear_block = clear_block_sse; |
||||||
|
c->clear_blocks = clear_blocks_sse; |
||||||
|
} |
||||||
|
} |
||||||
|
#endif /* HAVE_INLINE_ASM */ |
||||||
|
} |
Loading…
Reference in new issue