dsputil: Split clear_block*/fill_block* off into a separate context

pull/76/merge
Diego Biurrun 11 years ago
parent 869fc416f7
commit e74433a8e6
  1. 35
      configure
  2. 5
      libavcodec/4xm.c
  3. 1
      libavcodec/Makefile
  4. 3
      libavcodec/arm/Makefile
  5. 26
      libavcodec/arm/blockdsp_arm.h
  6. 33
      libavcodec/arm/blockdsp_init_arm.c
  7. 37
      libavcodec/arm/blockdsp_init_neon.c
  8. 38
      libavcodec/arm/blockdsp_neon.S
  9. 8
      libavcodec/arm/dsputil_init_neon.c
  10. 16
      libavcodec/arm/dsputil_neon.S
  11. 2
      libavcodec/asv.h
  12. 4
      libavcodec/asvdec.c
  13. 16
      libavcodec/bink.c
  14. 78
      libavcodec/blockdsp.c
  15. 52
      libavcodec/blockdsp.h
  16. 1
      libavcodec/cavs.c
  17. 2
      libavcodec/cavs.h
  18. 2
      libavcodec/cavsdec.c
  19. 9
      libavcodec/dnxhddec.c
  20. 10
      libavcodec/dnxhdenc.c
  21. 1
      libavcodec/dnxhdenc.h
  22. 36
      libavcodec/dsputil.c
  23. 25
      libavcodec/dsputil.h
  24. 4
      libavcodec/eamad.c
  25. 4
      libavcodec/eatqi.c
  26. 7
      libavcodec/g2meet.c
  27. 2
      libavcodec/h261dec.c
  28. 2
      libavcodec/h263.h
  29. 2
      libavcodec/intrax8.c
  30. 8
      libavcodec/ituh263dec.c
  31. 12
      libavcodec/jvdec.c
  32. 5
      libavcodec/mdec.c
  33. 5
      libavcodec/mimic.c
  34. 6
      libavcodec/mjpegdec.c
  35. 2
      libavcodec/mjpegdec.h
  36. 8
      libavcodec/mpeg12dec.c
  37. 8
      libavcodec/mpeg4videodec.c
  38. 2
      libavcodec/mpeg4videoenc.c
  39. 4
      libavcodec/mpegvideo.c
  40. 2
      libavcodec/mpegvideo.h
  41. 4
      libavcodec/msmpeg4dec.c
  42. 1
      libavcodec/ppc/Makefile
  43. 169
      libavcodec/ppc/blockdsp.c
  44. 14
      libavcodec/ppc/dsputil_altivec.c
  45. 110
      libavcodec/ppc/dsputil_ppc.c
  46. 9
      libavcodec/vc1dec.c
  47. 5
      libavcodec/wmv2.c
  48. 4
      libavcodec/wmv2dec.c
  49. 1
      libavcodec/x86/Makefile
  50. 120
      libavcodec/x86/blockdsp_mmx.c
  51. 17
      libavcodec/x86/dsputil_init.c
  52. 56
      libavcodec/x86/dsputil_mmx.c
  53. 5
      libavcodec/x86/dsputil_x86.h

35
configure vendored

@ -1530,6 +1530,7 @@ CONFIG_EXTRA="
aandcttables aandcttables
ac3dsp ac3dsp
audio_frame_queue audio_frame_queue
blockdsp
cabac cabac
dsputil dsputil
gcrypt gcrypt
@ -1705,7 +1706,7 @@ mdct_select="fft"
rdft_select="fft" rdft_select="fft"
mpegaudio_select="mpegaudiodsp" mpegaudio_select="mpegaudiodsp"
mpegaudiodsp_select="dct" mpegaudiodsp_select="dct"
mpegvideo_select="dsputil hpeldsp videodsp" mpegvideo_select="blockdsp dsputil hpeldsp videodsp"
mpegvideoenc_select="dsputil mpegvideo qpeldsp" mpegvideoenc_select="dsputil mpegvideo qpeldsp"
# decoders / encoders # decoders / encoders
@ -1722,33 +1723,33 @@ amrnb_decoder_select="lsp"
amrwb_decoder_select="lsp" amrwb_decoder_select="lsp"
amv_decoder_select="sp5x_decoder" amv_decoder_select="sp5x_decoder"
ape_decoder_select="dsputil" ape_decoder_select="dsputil"
asv1_decoder_select="dsputil" asv1_decoder_select="blockdsp dsputil"
asv1_encoder_select="dsputil" asv1_encoder_select="dsputil"
asv2_decoder_select="dsputil" asv2_decoder_select="blockdsp dsputil"
asv2_encoder_select="dsputil" asv2_encoder_select="dsputil"
atrac1_decoder_select="mdct sinewin" atrac1_decoder_select="mdct sinewin"
atrac3_decoder_select="mdct" atrac3_decoder_select="mdct"
atrac3p_decoder_select="mdct sinewin" atrac3p_decoder_select="mdct sinewin"
bink_decoder_select="dsputil hpeldsp" bink_decoder_select="blockdsp hpeldsp"
binkaudio_dct_decoder_select="mdct rdft dct sinewin" binkaudio_dct_decoder_select="mdct rdft dct sinewin"
binkaudio_rdft_decoder_select="mdct rdft sinewin" binkaudio_rdft_decoder_select="mdct rdft sinewin"
cavs_decoder_select="dsputil golomb h264chroma qpeldsp videodsp" cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp"
cllc_decoder_select="dsputil" cllc_decoder_select="dsputil"
comfortnoise_encoder_select="lpc" comfortnoise_encoder_select="lpc"
cook_decoder_select="dsputil mdct sinewin" cook_decoder_select="dsputil mdct sinewin"
cscd_decoder_select="lzo" cscd_decoder_select="lzo"
cscd_decoder_suggest="zlib" cscd_decoder_suggest="zlib"
dca_decoder_select="mdct" dca_decoder_select="mdct"
dnxhd_decoder_select="dsputil" dnxhd_decoder_select="blockdsp dsputil"
dnxhd_encoder_select="aandcttables dsputil mpegvideoenc" dnxhd_encoder_select="aandcttables blockdsp dsputil mpegvideoenc"
dvvideo_decoder_select="dsputil" dvvideo_decoder_select="dsputil"
dvvideo_encoder_select="dsputil" dvvideo_encoder_select="dsputil"
dxa_decoder_deps="zlib" dxa_decoder_deps="zlib"
eac3_decoder_select="ac3_decoder" eac3_decoder_select="ac3_decoder"
eac3_encoder_select="ac3_encoder" eac3_encoder_select="ac3_encoder"
eamad_decoder_select="aandcttables dsputil mpegvideo" eamad_decoder_select="aandcttables blockdsp dsputil mpegvideo"
eatgq_decoder_select="aandcttables dsputil" eatgq_decoder_select="aandcttables dsputil"
eatqi_decoder_select="aandcttables dsputil error_resilience mpegvideo" eatqi_decoder_select="aandcttables blockdsp dsputil error_resilience mpegvideo"
exr_decoder_deps="zlib" exr_decoder_deps="zlib"
ffv1_decoder_select="golomb rangecoder" ffv1_decoder_select="golomb rangecoder"
ffv1_encoder_select="rangecoder" ffv1_encoder_select="rangecoder"
@ -1762,10 +1763,10 @@ flashsv_encoder_deps="zlib"
flashsv2_decoder_deps="zlib" flashsv2_decoder_deps="zlib"
flv_decoder_select="h263_decoder" flv_decoder_select="h263_decoder"
flv_encoder_select="h263_encoder" flv_encoder_select="h263_encoder"
fourxm_decoder_select="dsputil" fourxm_decoder_select="blockdsp dsputil"
fraps_decoder_select="dsputil huffman" fraps_decoder_select="dsputil huffman"
g2m_decoder_deps="zlib" g2m_decoder_deps="zlib"
g2m_decoder_select="dsputil" g2m_decoder_select="blockdsp dsputil"
h261_decoder_select="error_resilience mpegvideo" h261_decoder_select="error_resilience mpegvideo"
h261_encoder_select="aandcttables mpegvideoenc" h261_encoder_select="aandcttables mpegvideoenc"
h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo qpeldsp" h263_decoder_select="error_resilience h263_parser h263dsp mpegvideo qpeldsp"
@ -1783,14 +1784,14 @@ indeo3_decoder_select="hpeldsp"
interplay_video_decoder_select="hpeldsp" interplay_video_decoder_select="hpeldsp"
jpegls_decoder_select="golomb mjpeg_decoder" jpegls_decoder_select="golomb mjpeg_decoder"
jpegls_encoder_select="golomb" jpegls_encoder_select="golomb"
jv_decoder_select="dsputil" jv_decoder_select="blockdsp"
lagarith_decoder_select="huffyuvdsp" lagarith_decoder_select="huffyuvdsp"
ljpeg_encoder_select="aandcttables mpegvideoenc" ljpeg_encoder_select="aandcttables mpegvideoenc"
loco_decoder_select="golomb" loco_decoder_select="golomb"
mdec_decoder_select="dsputil error_resilience mpegvideo" mdec_decoder_select="blockdsp dsputil error_resilience mpegvideo"
metasound_decoder_select="lsp mdct sinewin" metasound_decoder_select="lsp mdct sinewin"
mimic_decoder_select="dsputil hpeldsp" mimic_decoder_select="blockdsp dsputil hpeldsp"
mjpeg_decoder_select="dsputil hpeldsp" mjpeg_decoder_select="blockdsp dsputil hpeldsp"
mjpeg_encoder_select="aandcttables mpegvideoenc" mjpeg_encoder_select="aandcttables mpegvideoenc"
mjpegb_decoder_select="mjpeg_decoder" mjpegb_decoder_select="mjpeg_decoder"
mlp_decoder_select="mlp_parser" mlp_decoder_select="mlp_parser"
@ -1862,7 +1863,7 @@ twinvq_decoder_select="mdct lsp sinewin"
utvideo_decoder_select="dsputil" utvideo_decoder_select="dsputil"
utvideo_encoder_select="dsputil huffman huffyuvencdsp" utvideo_encoder_select="dsputil huffman huffyuvencdsp"
vble_decoder_select="huffyuvdsp" vble_decoder_select="huffyuvdsp"
vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp" vc1_decoder_select="blockdsp error_resilience h263_decoder h264chroma h264qpel intrax8 qpeldsp"
vc1image_decoder_select="vc1_decoder" vc1image_decoder_select="vc1_decoder"
vorbis_decoder_select="mdct" vorbis_decoder_select="mdct"
vorbis_encoder_select="mdct" vorbis_encoder_select="mdct"
@ -1883,7 +1884,7 @@ wmav2_encoder_select="mdct sinewin"
wmavoice_decoder_select="lsp rdft dct mdct sinewin" wmavoice_decoder_select="lsp rdft dct mdct sinewin"
wmv1_decoder_select="h263_decoder" wmv1_decoder_select="h263_decoder"
wmv1_encoder_select="h263_encoder" wmv1_encoder_select="h263_encoder"
wmv2_decoder_select="h263_decoder intrax8 videodsp" wmv2_decoder_select="blockdsp h263_decoder intrax8 videodsp"
wmv2_encoder_select="h263_encoder" wmv2_encoder_select="h263_encoder"
wmv3_decoder_select="vc1_decoder" wmv3_decoder_select="vc1_decoder"
wmv3image_decoder_select="wmv3_decoder" wmv3image_decoder_select="wmv3_decoder"

@ -30,6 +30,7 @@
#include "libavutil/imgutils.h" #include "libavutil/imgutils.h"
#include "libavutil/intreadwrite.h" #include "libavutil/intreadwrite.h"
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "bytestream.h" #include "bytestream.h"
#include "dsputil.h" #include "dsputil.h"
#include "get_bits.h" #include "get_bits.h"
@ -132,6 +133,7 @@ typedef struct CFrameBuffer {
typedef struct FourXContext { typedef struct FourXContext {
AVCodecContext *avctx; AVCodecContext *avctx;
DSPContext dsp; DSPContext dsp;
BlockDSPContext bdsp;
uint16_t *frame_buffer; uint16_t *frame_buffer;
uint16_t *last_frame_buffer; uint16_t *last_frame_buffer;
GetBitContext pre_gb; ///< ac/dc prefix GetBitContext pre_gb; ///< ac/dc prefix
@ -564,7 +566,7 @@ static int decode_i_mb(FourXContext *f)
int ret; int ret;
int i; int i;
f->dsp.clear_blocks(f->block[0]); f->bdsp.clear_blocks(f->block[0]);
for (i = 0; i < 6; i++) for (i = 0; i < 6; i++)
if ((ret = decode_i_block(f, f->block[i])) < 0) if ((ret = decode_i_block(f, f->block[i])) < 0)
@ -953,6 +955,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
} }
f->version = AV_RL32(avctx->extradata) >> 16; f->version = AV_RL32(avctx->extradata) >> 16;
ff_blockdsp_init(&f->bdsp, avctx);
ff_dsputil_init(&f->dsp, avctx); ff_dsputil_init(&f->dsp, avctx);
f->avctx = avctx; f->avctx = avctx;
init_vlcs(f); init_vlcs(f);

@ -28,6 +28,7 @@ OBJS = allcodecs.o \
OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o
OBJS-$(CONFIG_AC3DSP) += ac3dsp.o OBJS-$(CONFIG_AC3DSP) += ac3dsp.o
OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o
OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o
OBJS-$(CONFIG_CABAC) += cabac.o OBJS-$(CONFIG_CABAC) += cabac.o
OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o
OBJS-$(CONFIG_DXVA2) += dxva2.o OBJS-$(CONFIG_DXVA2) += dxva2.o

@ -4,6 +4,7 @@ OBJS += arm/fmtconvert_init_arm.o
OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \
arm/ac3dsp_arm.o arm/ac3dsp_arm.o
OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o
OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \
arm/dsputil_arm.o \ arm/dsputil_arm.o \
arm/jrevdct_arm.o \ arm/jrevdct_arm.o \
@ -76,6 +77,8 @@ VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \
NEON-OBJS += arm/fmtconvert_neon.o NEON-OBJS += arm/fmtconvert_neon.o
NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o
NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \
arm/blockdsp_neon.o
NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \
arm/dsputil_neon.o \ arm/dsputil_neon.o \
arm/int_neon.o \ arm/int_neon.o \

@ -0,0 +1,26 @@
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_ARM_BLOCKDSP_ARM_H
#define AVCODEC_ARM_BLOCKDSP_ARM_H
#include "libavcodec/blockdsp.h"
void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth);
#endif /* AVCODEC_ARM_BLOCKDSP_ARM_H */

@ -0,0 +1,33 @@
/*
* ARM optimized block operations
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/arm/cpu.h"
#include "libavcodec/blockdsp.h"
#include "blockdsp_arm.h"
av_cold void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags))
ff_blockdsp_init_neon(c, high_bit_depth);
}

@ -0,0 +1,37 @@
/*
* ARM NEON optimised block operations
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "libavutil/attributes.h"
#include "libavcodec/blockdsp.h"
#include "blockdsp_arm.h"
void ff_clear_block_neon(int16_t *block);
void ff_clear_blocks_neon(int16_t *blocks);
av_cold void ff_blockdsp_init_neon(BlockDSPContext *c, unsigned high_bit_depth)
{
if (!high_bit_depth) {
c->clear_block = ff_clear_block_neon;
c->clear_blocks = ff_clear_blocks_neon;
}
}

@ -0,0 +1,38 @@
/*
* ARM NEON optimised block functions
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/arm/asm.S"
function ff_clear_block_neon, export=1
vmov.i16 q0, #0
.rept 8
vst1.16 {q0}, [r0,:128]!
.endr
bx lr
endfunc
function ff_clear_blocks_neon, export=1
vmov.i16 q0, #0
.rept 8*6
vst1.16 {q0}, [r0,:128]!
.endr
bx lr
endfunc

@ -30,9 +30,6 @@ void ff_simple_idct_neon(int16_t *data);
void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data); void ff_simple_idct_put_neon(uint8_t *dest, int line_size, int16_t *data);
void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data); void ff_simple_idct_add_neon(uint8_t *dest, int line_size, int16_t *data);
void ff_clear_block_neon(int16_t *block);
void ff_clear_blocks_neon(int16_t *blocks);
void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
@ -61,11 +58,6 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx,
c->put_pixels_clamped = ff_put_pixels_clamped_neon; c->put_pixels_clamped = ff_put_pixels_clamped_neon;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
if (!high_bit_depth) {
c->clear_block = ff_clear_block_neon;
c->clear_blocks = ff_clear_blocks_neon;
}
c->vector_clipf = ff_vector_clipf_neon; c->vector_clipf = ff_vector_clipf_neon;
c->vector_clip_int32 = ff_vector_clip_int32_neon; c->vector_clip_int32 = ff_vector_clip_int32_neon;

@ -21,22 +21,6 @@
#include "libavutil/arm/asm.S" #include "libavutil/arm/asm.S"
function ff_clear_block_neon, export=1
vmov.i16 q0, #0
.rept 8
vst1.16 {q0}, [r0,:128]!
.endr
bx lr
endfunc
function ff_clear_blocks_neon, export=1
vmov.i16 q0, #0
.rept 8*6
vst1.16 {q0}, [r0,:128]!
.endr
bx lr
endfunc
function ff_put_pixels_clamped_neon, export=1 function ff_put_pixels_clamped_neon, export=1
vld1.16 {d16-d19}, [r0,:128]! vld1.16 {d16-d19}, [r0,:128]!
vqmovun.s16 d0, q8 vqmovun.s16 d0, q8

@ -31,12 +31,14 @@
#include "libavutil/mem.h" #include "libavutil/mem.h"
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "dsputil.h" #include "dsputil.h"
#include "get_bits.h" #include "get_bits.h"
#include "put_bits.h" #include "put_bits.h"
typedef struct ASV1Context{ typedef struct ASV1Context{
AVCodecContext *avctx; AVCodecContext *avctx;
BlockDSPContext bdsp;
DSPContext dsp; DSPContext dsp;
PutBitContext pb; PutBitContext pb;
GetBitContext gb; GetBitContext gb;

@ -28,6 +28,7 @@
#include "asv.h" #include "asv.h"
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "put_bits.h" #include "put_bits.h"
#include "internal.h" #include "internal.h"
#include "mathops.h" #include "mathops.h"
@ -164,7 +165,7 @@ static inline int decode_mb(ASV1Context *a, int16_t block[6][64])
{ {
int i; int i;
a->dsp.clear_blocks(block[0]); a->bdsp.clear_blocks(block[0]);
if (a->avctx->codec_id == AV_CODEC_ID_ASV1) { if (a->avctx->codec_id == AV_CODEC_ID_ASV1) {
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
@ -280,6 +281,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
} }
ff_asv_common_init(avctx); ff_asv_common_init(avctx);
ff_blockdsp_init(&a->bdsp, avctx);
init_vlcs(a); init_vlcs(a);
ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_asv_scantab); ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_asv_scantab);
avctx->pix_fmt = AV_PIX_FMT_YUV420P; avctx->pix_fmt = AV_PIX_FMT_YUV420P;

@ -24,9 +24,9 @@
#include "libavutil/imgutils.h" #include "libavutil/imgutils.h"
#include "libavutil/internal.h" #include "libavutil/internal.h"
#include "avcodec.h" #include "avcodec.h"
#include "dsputil.h"
#include "binkdata.h" #include "binkdata.h"
#include "binkdsp.h" #include "binkdsp.h"
#include "blockdsp.h"
#include "hpeldsp.h" #include "hpeldsp.h"
#include "internal.h" #include "internal.h"
#include "mathops.h" #include "mathops.h"
@ -113,7 +113,7 @@ typedef struct Bundle {
*/ */
typedef struct BinkContext { typedef struct BinkContext {
AVCodecContext *avctx; AVCodecContext *avctx;
DSPContext dsp; BlockDSPContext bdsp;
HpelDSPContext hdsp; HpelDSPContext hdsp;
BinkDSPContext binkdsp; BinkDSPContext binkdsp;
AVFrame *last; AVFrame *last;
@ -880,7 +880,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
} else { } else {
put_pixels8x8_overlapped(dst, ref, stride); put_pixels8x8_overlapped(dst, ref, stride);
} }
c->dsp.clear_block(block); c->bdsp.clear_block(block);
v = binkb_get_value(c, BINKB_SRC_INTER_COEFS); v = binkb_get_value(c, BINKB_SRC_INTER_COEFS);
read_residue(gb, block, v); read_residue(gb, block, v);
c->binkdsp.add_pixels8(dst, block, stride); c->binkdsp.add_pixels8(dst, block, stride);
@ -904,7 +904,7 @@ static int binkb_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
break; break;
case 5: case 5:
v = binkb_get_value(c, BINKB_SRC_COLORS); v = binkb_get_value(c, BINKB_SRC_COLORS);
c->dsp.fill_block_tab[1](dst, v, stride, 8); c->bdsp.fill_block_tab[1](dst, v, stride, 8);
break; break;
case 6: case 6:
for (i = 0; i < 2; i++) for (i = 0; i < 2; i++)
@ -1047,7 +1047,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
break; break;
case FILL_BLOCK: case FILL_BLOCK:
v = get_value(c, BINK_SRC_COLORS); v = get_value(c, BINK_SRC_COLORS);
c->dsp.fill_block_tab[0](dst, v, stride, 16); c->bdsp.fill_block_tab[0](dst, v, stride, 16);
break; break;
case PATTERN_BLOCK: case PATTERN_BLOCK:
for (i = 0; i < 2; i++) for (i = 0; i < 2; i++)
@ -1117,7 +1117,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
} }
c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8); c->hdsp.put_pixels_tab[1][0](dst, ref, stride, 8);
c->dsp.clear_block(block); c->bdsp.clear_block(block);
v = get_bits(gb, 7); v = get_bits(gb, 7);
read_residue(gb, block, v); read_residue(gb, block, v);
c->binkdsp.add_pixels8(dst, block, stride); c->binkdsp.add_pixels8(dst, block, stride);
@ -1130,7 +1130,7 @@ static int bink_decode_plane(BinkContext *c, AVFrame *frame, GetBitContext *gb,
break; break;
case FILL_BLOCK: case FILL_BLOCK:
v = get_value(c, BINK_SRC_COLORS); v = get_value(c, BINK_SRC_COLORS);
c->dsp.fill_block_tab[1](dst, v, stride, 8); c->bdsp.fill_block_tab[1](dst, v, stride, 8);
break; break;
case INTER_BLOCK: case INTER_BLOCK:
xoff = get_value(c, BINK_SRC_X_OFF); xoff = get_value(c, BINK_SRC_X_OFF);
@ -1310,7 +1310,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
avctx->pix_fmt = c->has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P; avctx->pix_fmt = c->has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P;
ff_dsputil_init(&c->dsp, avctx); ff_blockdsp_init(&c->bdsp, avctx);
ff_hpeldsp_init(&c->hdsp, avctx->flags); ff_hpeldsp_init(&c->hdsp, avctx->flags);
ff_binkdsp_init(&c->binkdsp); ff_binkdsp_init(&c->binkdsp);

@ -0,0 +1,78 @@
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include <string.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "avcodec.h"
#include "blockdsp.h"
#include "version.h"
static void clear_block_8_c(int16_t *block)
{
memset(block, 0, sizeof(int16_t) * 64);
}
static void clear_blocks_8_c(int16_t *blocks)
{
memset(blocks, 0, sizeof(int16_t) * 6 * 64);
}
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
{
int i;
for (i = 0; i < h; i++) {
memset(block, value, 16);
block += line_size;
}
}
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
{
int i;
for (i = 0; i < h; i++) {
memset(block, value, 8);
block += line_size;
}
}
av_cold void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx)
{
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
c->clear_block = clear_block_8_c;
c->clear_blocks = clear_blocks_8_c;
c->fill_block_tab[0] = fill_block16_c;
c->fill_block_tab[1] = fill_block8_c;
if (ARCH_ARM)
ff_blockdsp_init_arm(c, high_bit_depth);
if (ARCH_PPC)
ff_blockdsp_init_ppc(c, high_bit_depth);
if (ARCH_X86)
#if FF_API_XVMC
ff_blockdsp_init_x86(c, high_bit_depth, avctx);
#else
ff_blockdsp_init_x86(c, high_bit_depth);
#endif /* FF_API_XVMC */
}

@ -0,0 +1,52 @@
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_BLOCKDSP_H
#define AVCODEC_BLOCKDSP_H
#include <stdint.h>
#include "avcodec.h"
#include "version.h"
/* add and put pixel (decoding)
* Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
* h for op_pixels_func is limited to { width / 2, width },
* but never larger than 16 and never smaller than 4. */
typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */,
uint8_t value, int line_size, int h);
typedef struct BlockDSPContext {
void (*clear_block)(int16_t *block /* align 16 */);
void (*clear_blocks)(int16_t *blocks /* align 16 */);
op_fill_func fill_block_tab[2];
} BlockDSPContext;
void ff_blockdsp_init(BlockDSPContext *c, AVCodecContext *avctx);
void ff_blockdsp_init_arm(BlockDSPContext *c, unsigned high_bit_depth);
void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth);
#if FF_API_XVMC
void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
AVCodecContext *avctx);
#else
void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth);
#endif /* FF_API_XVMC */
#endif /* AVCODEC_BLOCKDSP_H */

@ -759,6 +759,7 @@ av_cold int ff_cavs_init(AVCodecContext *avctx)
{ {
AVSContext *h = avctx->priv_data; AVSContext *h = avctx->priv_data;
ff_blockdsp_init(&h->bdsp, avctx);
ff_dsputil_init(&h->dsp, avctx); ff_dsputil_init(&h->dsp, avctx);
ff_h264chroma_init(&h->h264chroma, 8); ff_h264chroma_init(&h->h264chroma, 8);
ff_videodsp_init(&h->vdsp, 8); ff_videodsp_init(&h->vdsp, 8);

@ -23,6 +23,7 @@
#define AVCODEC_CAVS_H #define AVCODEC_CAVS_H
#include "cavsdsp.h" #include "cavsdsp.h"
#include "blockdsp.h"
#include "dsputil.h" #include "dsputil.h"
#include "h264chroma.h" #include "h264chroma.h"
#include "get_bits.h" #include "get_bits.h"
@ -162,6 +163,7 @@ typedef struct AVSFrame {
typedef struct AVSContext { typedef struct AVSContext {
AVCodecContext *avctx; AVCodecContext *avctx;
DSPContext dsp; DSPContext dsp;
BlockDSPContext bdsp;
H264ChromaContext h264chroma; H264ChromaContext h264chroma;
VideoDSPContext vdsp; VideoDSPContext vdsp;
CAVSDSPContext cdsp; CAVSDSPContext cdsp;

@ -581,7 +581,7 @@ static int decode_residual_block(AVSContext *h, GetBitContext *gb,
dequant_shift[qp], i)) < 0) dequant_shift[qp], i)) < 0)
return ret; return ret;
h->cdsp.cavs_idct8_add(dst, block, stride); h->cdsp.cavs_idct8_add(dst, block, stride);
h->dsp.clear_block(block); h->bdsp.clear_block(block);
return 0; return 0;
} }

@ -25,6 +25,7 @@
#include "libavutil/imgutils.h" #include "libavutil/imgutils.h"
#include "libavutil/timer.h" #include "libavutil/timer.h"
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "get_bits.h" #include "get_bits.h"
#include "dnxhddata.h" #include "dnxhddata.h"
#include "dsputil.h" #include "dsputil.h"
@ -33,6 +34,7 @@
typedef struct DNXHDContext { typedef struct DNXHDContext {
AVCodecContext *avctx; AVCodecContext *avctx;
GetBitContext gb; GetBitContext gb;
BlockDSPContext bdsp;
int cid; ///< compression id int cid; ///< compression id
unsigned int width, height; unsigned int width, height;
unsigned int mb_width, mb_height; unsigned int mb_width, mb_height;
@ -133,6 +135,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
ctx->avctx->pix_fmt = AV_PIX_FMT_YUV444P10; ctx->avctx->pix_fmt = AV_PIX_FMT_YUV444P10;
ctx->avctx->bits_per_raw_sample = 10; ctx->avctx->bits_per_raw_sample = 10;
if (ctx->bit_depth != 10) { if (ctx->bit_depth != 10) {
ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
ff_dsputil_init(&ctx->dsp, ctx->avctx); ff_dsputil_init(&ctx->dsp, ctx->avctx);
ctx->bit_depth = 10; ctx->bit_depth = 10;
ctx->decode_dct_block = dnxhd_decode_dct_block_10_444; ctx->decode_dct_block = dnxhd_decode_dct_block_10_444;
@ -142,6 +145,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P10; ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P10;
ctx->avctx->bits_per_raw_sample = 10; ctx->avctx->bits_per_raw_sample = 10;
if (ctx->bit_depth != 10) { if (ctx->bit_depth != 10) {
ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
ff_dsputil_init(&ctx->dsp, ctx->avctx); ff_dsputil_init(&ctx->dsp, ctx->avctx);
ctx->bit_depth = 10; ctx->bit_depth = 10;
ctx->decode_dct_block = dnxhd_decode_dct_block_10; ctx->decode_dct_block = dnxhd_decode_dct_block_10;
@ -150,6 +154,7 @@ static int dnxhd_decode_header(DNXHDContext *ctx, AVFrame *frame,
ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P; ctx->avctx->pix_fmt = AV_PIX_FMT_YUV422P;
ctx->avctx->bits_per_raw_sample = 8; ctx->avctx->bits_per_raw_sample = 8;
if (ctx->bit_depth != 8) { if (ctx->bit_depth != 8) {
ff_blockdsp_init(&ctx->bdsp, ctx->avctx);
ff_dsputil_init(&ctx->dsp, ctx->avctx); ff_dsputil_init(&ctx->dsp, ctx->avctx);
ctx->bit_depth = 8; ctx->bit_depth = 8;
ctx->decode_dct_block = dnxhd_decode_dct_block_8; ctx->decode_dct_block = dnxhd_decode_dct_block_8;
@ -307,12 +312,12 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, AVFrame *frame,
skip_bits1(&ctx->gb); skip_bits1(&ctx->gb);
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
ctx->dsp.clear_block(ctx->blocks[i]); ctx->bdsp.clear_block(ctx->blocks[i]);
ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale); ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
} }
if (ctx->is_444) { if (ctx->is_444) {
for (; i < 12; i++) { for (; i < 12; i++) {
ctx->dsp.clear_block(ctx->blocks[i]); ctx->bdsp.clear_block(ctx->blocks[i]);
ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale); ctx->decode_dct_block(ctx, ctx->blocks[i], i, qscale);
} }
} }

@ -29,6 +29,7 @@
#include "libavutil/timer.h" #include "libavutil/timer.h"
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "dsputil.h" #include "dsputil.h"
#include "internal.h" #include "internal.h"
#include "mpegvideo.h" #include "mpegvideo.h"
@ -305,6 +306,7 @@ static av_cold int dnxhd_encode_init(AVCodecContext *avctx)
avctx->bits_per_raw_sample = ctx->cid_table->bit_depth; avctx->bits_per_raw_sample = ctx->cid_table->bit_depth;
ff_blockdsp_init(&ctx->bdsp, avctx);
ff_dsputil_init(&ctx->m.dsp, avctx); ff_dsputil_init(&ctx->m.dsp, avctx);
ff_dct_common_init(&ctx->m); ff_dct_common_init(&ctx->m);
if (!ctx->m.dct_quantize) if (!ctx->m.dct_quantize)
@ -556,10 +558,10 @@ void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
ptr_v + ctx->dct_uv_offset, ptr_v + ctx->dct_uv_offset,
ctx->m.uvlinesize); ctx->m.uvlinesize);
} else { } else {
dsp->clear_block(ctx->blocks[4]); ctx->bdsp.clear_block(ctx->blocks[4]);
dsp->clear_block(ctx->blocks[5]); ctx->bdsp.clear_block(ctx->blocks[5]);
dsp->clear_block(ctx->blocks[6]); ctx->bdsp.clear_block(ctx->blocks[6]);
dsp->clear_block(ctx->blocks[7]); ctx->bdsp.clear_block(ctx->blocks[7]);
} }
} else { } else {
dsp->get_pixels(ctx->blocks[4], dsp->get_pixels(ctx->blocks[4],

@ -41,6 +41,7 @@ typedef struct RCEntry {
typedef struct DNXHDEncContext { typedef struct DNXHDEncContext {
AVClass *class; AVClass *class;
BlockDSPContext bdsp;
MpegEncContext m; ///< Used for quantization dsp functions MpegEncContext m; ///< Used for quantization dsp functions
int cid; int cid;

@ -373,26 +373,6 @@ static int sum_abs_dctelem_c(int16_t *block)
return sum; return sum;
} }
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
{
int i;
for (i = 0; i < h; i++) {
memset(block, value, 16);
block += line_size;
}
}
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
{
int i;
for (i = 0; i < h; i++) {
memset(block, value, 8);
block += line_size;
}
}
#define avg2(a, b) ((a + b + 1) >> 1) #define avg2(a, b) ((a + b + 1) >> 1)
#define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2) #define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2)
@ -1408,16 +1388,6 @@ static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
} }
static void clear_block_8_c(int16_t *block)
{
memset(block, 0, sizeof(int16_t) * 64);
}
static void clear_blocks_8_c(int16_t *blocks)
{
memset(blocks, 0, sizeof(int16_t) * 6 * 64);
}
/* init static data */ /* init static data */
av_cold void ff_dsputil_static_init(void) av_cold void ff_dsputil_static_init(void)
{ {
@ -1487,9 +1457,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
c->pix_sum = pix_sum_c; c->pix_sum = pix_sum_c;
c->pix_norm1 = pix_norm1_c; c->pix_norm1 = pix_norm1_c;
c->fill_block_tab[0] = fill_block16_c;
c->fill_block_tab[1] = fill_block8_c;
/* TODO [0] 16 [1] 8 */ /* TODO [0] 16 [1] 8 */
c->pix_abs[0][0] = pix_abs16_c; c->pix_abs[0][0] = pix_abs16_c;
c->pix_abs[0][1] = pix_abs16_x2_c; c->pix_abs[0][1] = pix_abs16_x2_c;
@ -1546,9 +1513,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
c->draw_edges = draw_edges_8_c; c->draw_edges = draw_edges_8_c;
c->clear_block = clear_block_8_c;
c->clear_blocks = clear_blocks_8_c;
switch (avctx->bits_per_raw_sample) { switch (avctx->bits_per_raw_sample) {
case 9: case 9:
case 10: case 10:

@ -38,26 +38,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int dxx, int dxy, int dyx, int dyy, int shift, int r,
int width, int height); int width, int height);
/* minimum alignment rules ;)
* If you notice errors in the align stuff, need more alignment for some ASM code
* for some CPU or need to use a function with less aligned data then send a mail
* to the libav-devel mailing list, ...
*
* !warning These alignments might not match reality, (missing attribute((align))
* stuff somewhere possible).
* I (Michael) did not check them, these are just the alignments which I think
* could be reached easily ...
*
* !future video codecs might need functions with less strict alignment
*/
/* add and put pixel (decoding)
* Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
* h for op_pixels_func is limited to { width / 2, width },
* but never larger than 16 and never smaller than 4. */
typedef void (*op_fill_func)(uint8_t *block /* align width (8 or 16) */,
uint8_t value, int line_size, int h);
struct MpegEncContext; struct MpegEncContext;
/* Motion estimation: /* Motion estimation:
* h is limited to { width / 2, width, 2 * width }, * h is limited to { width / 2, width, 2 * width },
@ -116,8 +96,7 @@ typedef struct DSPContext {
int stride, int h, int ox, int oy, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int dxx, int dxy, int dyx, int dyy,
int shift, int r, int width, int height); int shift, int r, int width, int height);
void (*clear_block)(int16_t *block /* align 16 */);
void (*clear_blocks)(int16_t *blocks /* align 16 */);
int (*pix_sum)(uint8_t *pix, int line_size); int (*pix_sum)(uint8_t *pix, int line_size);
int (*pix_norm1)(uint8_t *pix, int line_size); int (*pix_norm1)(uint8_t *pix, int line_size);
@ -234,8 +213,6 @@ typedef struct DSPContext {
*/ */
void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len); int32_t max, unsigned int len);
op_fill_func fill_block_tab[2];
} DSPContext; } DSPContext;
void ff_dsputil_static_init(void); void ff_dsputil_static_init(void);

@ -44,6 +44,7 @@
typedef struct MadContext { typedef struct MadContext {
AVCodecContext *avctx; AVCodecContext *avctx;
BlockDSPContext bdsp;
DSPContext dsp; DSPContext dsp;
AVFrame *last_frame; AVFrame *last_frame;
GetBitContext gb; GetBitContext gb;
@ -61,6 +62,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
MadContext *s = avctx->priv_data; MadContext *s = avctx->priv_data;
s->avctx = avctx; s->avctx = avctx;
avctx->pix_fmt = AV_PIX_FMT_YUV420P; avctx->pix_fmt = AV_PIX_FMT_YUV420P;
ff_blockdsp_init(&s->bdsp, avctx);
ff_dsputil_init(&s->dsp, avctx); ff_dsputil_init(&s->dsp, avctx);
ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM);
ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
@ -207,7 +209,7 @@ static void decode_mb(MadContext *s, AVFrame *frame, int inter)
int add = 2*decode_motion(&s->gb); int add = 2*decode_motion(&s->gb);
comp_block(s, frame, s->mb_x, s->mb_y, j, mv_x, mv_y, add); comp_block(s, frame, s->mb_x, s->mb_y, j, mv_x, mv_y, add);
} else { } else {
s->dsp.clear_block(s->block); s->bdsp.clear_block(s->block);
decode_block_intra(s, s->block); decode_block_intra(s, s->block);
idct_put(s, frame, s->block, s->mb_x, s->mb_y, j); idct_put(s, frame, s->block, s->mb_x, s->mb_y, j);
} }

@ -27,6 +27,7 @@
*/ */
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "get_bits.h" #include "get_bits.h"
#include "aandcttab.h" #include "aandcttab.h"
#include "eaidct.h" #include "eaidct.h"
@ -46,6 +47,7 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx)
TqiContext *t = avctx->priv_data; TqiContext *t = avctx->priv_data;
MpegEncContext *s = &t->s; MpegEncContext *s = &t->s;
s->avctx = avctx; s->avctx = avctx;
ff_blockdsp_init(&s->bdsp, avctx);
ff_dsputil_init(&s->dsp, avctx); ff_dsputil_init(&s->dsp, avctx);
ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM); ff_init_scantable_permutation(s->dsp.idct_permutation, FF_NO_IDCT_PERM);
ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct); ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable, ff_zigzag_direct);
@ -59,7 +61,7 @@ static av_cold int tqi_decode_init(AVCodecContext *avctx)
static int tqi_decode_mb(MpegEncContext *s, int16_t (*block)[64]) static int tqi_decode_mb(MpegEncContext *s, int16_t (*block)[64])
{ {
int n; int n;
s->dsp.clear_blocks(block[0]); s->bdsp.clear_blocks(block[0]);
for (n=0; n<6; n++) for (n=0; n<6; n++)
if (ff_mpeg1_decode_block_intra(s, block[n], n) < 0) if (ff_mpeg1_decode_block_intra(s, block[n], n) < 0)
return -1; return -1;

@ -29,6 +29,7 @@
#include "libavutil/intreadwrite.h" #include "libavutil/intreadwrite.h"
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "bytestream.h" #include "bytestream.h"
#include "dsputil.h" #include "dsputil.h"
#include "get_bits.h" #include "get_bits.h"
@ -72,6 +73,7 @@ static const uint8_t chroma_quant[64] = {
}; };
typedef struct JPGContext { typedef struct JPGContext {
BlockDSPContext bdsp;
DSPContext dsp; DSPContext dsp;
ScanTable scantable; ScanTable scantable;
@ -150,6 +152,7 @@ static av_cold int jpg_init(AVCodecContext *avctx, JPGContext *c)
if (ret) if (ret)
return ret; return ret;
ff_blockdsp_init(&c->bdsp, avctx);
ff_dsputil_init(&c->dsp, avctx); ff_dsputil_init(&c->dsp, avctx);
ff_init_scantable(c->dsp.idct_permutation, &c->scantable, ff_init_scantable(c->dsp.idct_permutation, &c->scantable,
ff_zigzag_direct); ff_zigzag_direct);
@ -193,7 +196,7 @@ static int jpg_decode_block(JPGContext *c, GetBitContext *gb,
const int is_chroma = !!plane; const int is_chroma = !!plane;
const uint8_t *qmat = is_chroma ? chroma_quant : luma_quant; const uint8_t *qmat = is_chroma ? chroma_quant : luma_quant;
c->dsp.clear_block(block); c->bdsp.clear_block(block);
dc = get_vlc2(gb, c->dc_vlc[is_chroma].table, 9, 3); dc = get_vlc2(gb, c->dc_vlc[is_chroma].table, 9, 3);
if (dc < 0) if (dc < 0)
return AVERROR_INVALIDDATA; return AVERROR_INVALIDDATA;
@ -259,7 +262,7 @@ static int jpg_decode_data(JPGContext *c, int width, int height,
for (i = 0; i < 3; i++) for (i = 0; i < 3; i++)
c->prev_dc[i] = 1024; c->prev_dc[i] = 1024;
bx = by = 0; bx = by = 0;
c->dsp.clear_blocks(c->block[0]); c->bdsp.clear_blocks(c->block[0]);
for (mb_y = 0; mb_y < mb_h; mb_y++) { for (mb_y = 0; mb_y < mb_h; mb_y++) {
for (mb_x = 0; mb_x < mb_w; mb_x++) { for (mb_x = 0; mb_x < mb_w; mb_x++) {
if (mask && !mask[mb_x * 2] && !mask[mb_x * 2 + 1] && if (mask && !mask[mb_x * 2] && !mask[mb_x * 2 + 1] &&

@ -433,7 +433,7 @@ static int h261_decode_mb(H261Context *h)
intra: intra:
/* decode each block */ /* decode each block */
if (s->mb_intra || HAS_CBP(h->mtype)) { if (s->mb_intra || HAS_CBP(h->mtype)) {
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if (h261_decode_block(h, s->block[i], i, cbp & 32) < 0) if (h261_decode_block(h, s->block[i], i, cbp & 32) < 0)
return SLICE_ERROR; return SLICE_ERROR;

@ -197,7 +197,7 @@ static inline int get_p_cbp(MpegEncContext * s,
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){ if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
s->block_last_index[i]= -1; s->block_last_index[i]= -1;
s->dsp.clear_block(s->block[i]); s->bdsp.clear_block(s->block[i]);
} }
} }
}else{ }else{

@ -538,7 +538,7 @@ static int x8_decode_intra_mb(IntraX8Context* const w, const int chroma){
int sign; int sign;
assert(w->orient<12); assert(w->orient<12);
s->dsp.clear_block(s->block[0]); s->bdsp.clear_block(s->block[0]);
if(chroma){ if(chroma){
dc_mode=2; dc_mode=2;

@ -538,7 +538,7 @@ retry:
rl = &ff_rl_intra_aic; rl = &ff_rl_intra_aic;
i = 0; i = 0;
s->gb= gb; s->gb= gb;
s->dsp.clear_block(block); s->bdsp.clear_block(block);
goto retry; goto retry;
} }
av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra); av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra);
@ -628,7 +628,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
} }
}while(cbpc == 20); }while(cbpc == 20);
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
dquant = cbpc & 8; dquant = cbpc & 8;
s->mb_intra = ((cbpc & 4) != 0); s->mb_intra = ((cbpc & 4) != 0);
@ -723,7 +723,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
s->mb_intra = IS_INTRA(mb_type); s->mb_intra = IS_INTRA(mb_type);
if(HAS_CBP(mb_type)){ if(HAS_CBP(mb_type)){
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
cbpc = get_vlc2(&s->gb, cbpc_b_vlc.table, CBPC_B_VLC_BITS, 1); cbpc = get_vlc2(&s->gb, cbpc_b_vlc.table, CBPC_B_VLC_BITS, 1);
if(s->mb_intra){ if(s->mb_intra){
dquant = IS_QUANT(mb_type); dquant = IS_QUANT(mb_type);
@ -797,7 +797,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
} }
}while(cbpc == 8); }while(cbpc == 8);
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
dquant = cbpc & 4; dquant = cbpc & 4;
s->mb_intra = 1; s->mb_intra = 1;

@ -28,12 +28,12 @@
#include "libavutil/intreadwrite.h" #include "libavutil/intreadwrite.h"
#include "avcodec.h" #include "avcodec.h"
#include "dsputil.h" #include "blockdsp.h"
#include "get_bits.h" #include "get_bits.h"
#include "internal.h" #include "internal.h"
typedef struct JvContext { typedef struct JvContext {
DSPContext dsp; BlockDSPContext bdsp;
AVFrame *frame; AVFrame *frame;
uint32_t palette[AVPALETTE_COUNT]; uint32_t palette[AVPALETTE_COUNT];
int palette_has_changed; int palette_has_changed;
@ -48,7 +48,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
return AVERROR(ENOMEM); return AVERROR(ENOMEM);
avctx->pix_fmt = AV_PIX_FMT_PAL8; avctx->pix_fmt = AV_PIX_FMT_PAL8;
ff_dsputil_init(&s->dsp, avctx); ff_blockdsp_init(&s->bdsp, avctx);
return 0; return 0;
} }
@ -113,14 +113,14 @@ static inline void decode4x4(GetBitContext *gb, uint8_t *dst, int linesize)
* Decode 8x8 block * Decode 8x8 block
*/ */
static inline void decode8x8(GetBitContext *gb, uint8_t *dst, int linesize, static inline void decode8x8(GetBitContext *gb, uint8_t *dst, int linesize,
DSPContext *dsp) BlockDSPContext *bdsp)
{ {
int i, j, v[2]; int i, j, v[2];
switch (get_bits(gb, 2)) { switch (get_bits(gb, 2)) {
case 1: case 1:
v[0] = get_bits(gb, 8); v[0] = get_bits(gb, 8);
dsp->fill_block_tab[1](dst, v[0], linesize, 8); bdsp->fill_block_tab[1](dst, v[0], linesize, 8);
break; break;
case 2: case 2:
v[0] = get_bits(gb, 8); v[0] = get_bits(gb, 8);
@ -163,7 +163,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
for (i = 0; i < avctx->width; i += 8) for (i = 0; i < avctx->width; i += 8)
decode8x8(&gb, decode8x8(&gb,
s->frame->data[0] + j * s->frame->linesize[0] + i, s->frame->data[0] + j * s->frame->linesize[0] + i,
s->frame->linesize[0], &s->dsp); s->frame->linesize[0], &s->bdsp);
buf += video_size; buf += video_size;
} else if (video_type == 2) { } else if (video_type == 2) {

@ -28,12 +28,14 @@
*/ */
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "mpegvideo.h" #include "mpegvideo.h"
#include "mpeg12.h" #include "mpeg12.h"
#include "thread.h" #include "thread.h"
typedef struct MDECContext { typedef struct MDECContext {
AVCodecContext *avctx; AVCodecContext *avctx;
BlockDSPContext bdsp;
DSPContext dsp; DSPContext dsp;
ThreadFrame frame; ThreadFrame frame;
GetBitContext gb; GetBitContext gb;
@ -123,7 +125,7 @@ static inline int decode_mb(MDECContext *a, int16_t block[6][64])
int i, ret; int i, ret;
const int block_index[6] = { 5, 4, 0, 1, 2, 3 }; const int block_index[6] = { 5, 4, 0, 1, 2, 3 };
a->dsp.clear_blocks(block[0]); a->bdsp.clear_blocks(block[0]);
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if ((ret = mdec_decode_block_intra(a, block[block_index[i]], if ((ret = mdec_decode_block_intra(a, block[block_index[i]],
@ -212,6 +214,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
a->avctx = avctx; a->avctx = avctx;
ff_blockdsp_init(&a->bdsp, avctx);
ff_dsputil_init(&a->dsp, avctx); ff_dsputil_init(&a->dsp, avctx);
ff_mpeg12_init_vlcs(); ff_mpeg12_init_vlcs();
ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct); ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct);

@ -24,6 +24,7 @@
#include <stdint.h> #include <stdint.h>
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "internal.h" #include "internal.h"
#include "get_bits.h" #include "get_bits.h"
#include "bytestream.h" #include "bytestream.h"
@ -52,6 +53,7 @@ typedef struct {
GetBitContext gb; GetBitContext gb;
ScanTable scantable; ScanTable scantable;
BlockDSPContext bdsp;
DSPContext dsp; DSPContext dsp;
HpelDSPContext hdsp; HpelDSPContext hdsp;
VLC vlc; VLC vlc;
@ -145,6 +147,7 @@ static av_cold int mimic_decode_init(AVCodecContext *avctx)
av_log(avctx, AV_LOG_ERROR, "error initializing vlc table\n"); av_log(avctx, AV_LOG_ERROR, "error initializing vlc table\n");
return ret; return ret;
} }
ff_blockdsp_init(&ctx->bdsp, avctx);
ff_dsputil_init(&ctx->dsp, avctx); ff_dsputil_init(&ctx->dsp, avctx);
ff_hpeldsp_init(&ctx->hdsp, avctx->flags); ff_hpeldsp_init(&ctx->hdsp, avctx->flags);
ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, col_zag); ff_init_scantable(ctx->dsp.idct_permutation, &ctx->scantable, col_zag);
@ -227,7 +230,7 @@ static int vlc_decode_block(MimicContext *ctx, int num_coeffs, int qscale)
int16_t *block = ctx->dct_block; int16_t *block = ctx->dct_block;
unsigned int pos; unsigned int pos;
ctx->dsp.clear_block(block); ctx->bdsp.clear_block(block);
block[0] = get_bits(&ctx->gb, 8) << 3; block[0] = get_bits(&ctx->gb, 8) << 3;

@ -35,6 +35,7 @@
#include "libavutil/imgutils.h" #include "libavutil/imgutils.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "internal.h" #include "internal.h"
#include "mjpeg.h" #include "mjpeg.h"
#include "mjpegdec.h" #include "mjpegdec.h"
@ -92,6 +93,7 @@ av_cold int ff_mjpeg_decode_init(AVCodecContext *avctx)
} }
s->avctx = avctx; s->avctx = avctx;
ff_blockdsp_init(&s->bdsp, avctx);
ff_hpeldsp_init(&s->hdsp, avctx->flags); ff_hpeldsp_init(&s->hdsp, avctx->flags);
ff_dsputil_init(&s->dsp, avctx); ff_dsputil_init(&s->dsp, avctx);
ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct); ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
@ -486,7 +488,7 @@ static int decode_dc_progressive(MJpegDecodeContext *s, int16_t *block,
int16_t *quant_matrix, int Al) int16_t *quant_matrix, int Al)
{ {
int val; int val;
s->dsp.clear_block(block); s->bdsp.clear_block(block);
val = mjpeg_decode_dc(s, dc_index); val = mjpeg_decode_dc(s, dc_index);
if (val == 0xffff) { if (val == 0xffff) {
av_log(s->avctx, AV_LOG_ERROR, "error dc\n"); av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
@ -878,7 +880,7 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah,
reference_data[c] + block_offset, reference_data[c] + block_offset,
linesize[c], 8); linesize[c], 8);
else { else {
s->dsp.clear_block(s->block); s->bdsp.clear_block(s->block);
if (decode_block(s, s->block, i, if (decode_block(s, s->block, i,
s->dc_index[i], s->ac_index[i], s->dc_index[i], s->ac_index[i],
s->quant_matrixes[s->quant_index[c]]) < 0) { s->quant_matrixes[s->quant_index[c]]) < 0) {

@ -33,6 +33,7 @@
#include "libavutil/pixdesc.h" #include "libavutil/pixdesc.h"
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "get_bits.h" #include "get_bits.h"
#include "dsputil.h" #include "dsputil.h"
#include "hpeldsp.h" #include "hpeldsp.h"
@ -95,6 +96,7 @@ typedef struct MJpegDecodeContext {
uint8_t *last_nnz[MAX_COMPONENTS]; uint8_t *last_nnz[MAX_COMPONENTS];
uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode) uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
ScanTable scantable; ScanTable scantable;
BlockDSPContext bdsp;
DSPContext dsp; DSPContext dsp;
HpelDSPContext hdsp; HpelDSPContext hdsp;

@ -776,10 +776,10 @@ static int mpeg_decode_mb(MpegEncContext *s, int16_t block[12][64])
av_dlog(s->avctx, "mb_type=%x\n", mb_type); av_dlog(s->avctx, "mb_type=%x\n", mb_type);
// motion_type = 0; /* avoid warning */ // motion_type = 0; /* avoid warning */
if (IS_INTRA(mb_type)) { if (IS_INTRA(mb_type)) {
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
if (!s->chroma_y_shift) if (!s->chroma_y_shift)
s->dsp.clear_blocks(s->block[6]); s->bdsp.clear_blocks(s->block[6]);
/* compute DCT type */ /* compute DCT type */
// FIXME: add an interlaced_dct coded var? // FIXME: add an interlaced_dct coded var?
@ -1014,13 +1014,13 @@ FF_ENABLE_DEPRECATION_WARNINGS
s->mb_intra = 0; s->mb_intra = 0;
if (HAS_CBP(mb_type)) { if (HAS_CBP(mb_type)) {
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
cbp = get_vlc2(&s->gb, ff_mb_pat_vlc.table, MB_PAT_VLC_BITS, 1); cbp = get_vlc2(&s->gb, ff_mb_pat_vlc.table, MB_PAT_VLC_BITS, 1);
if (mb_block_count > 6) { if (mb_block_count > 6) {
cbp <<= mb_block_count - 6; cbp <<= mb_block_count - 6;
cbp |= get_bits(&s->gb, mb_block_count - 6); cbp |= get_bits(&s->gb, mb_block_count - 6);
s->dsp.clear_blocks(s->block[6]); s->bdsp.clear_blocks(s->block[6]);
} }
if (cbp <= 0) { if (cbp <= 0) {
av_log(s->avctx, AV_LOG_ERROR, av_log(s->avctx, AV_LOG_ERROR,

@ -1227,7 +1227,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, int16_t block[6][64])
if (!IS_SKIP(mb_type)) { if (!IS_SKIP(mb_type)) {
int i; int i;
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
/* decode each block */ /* decode each block */
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, s->mb_intra, ctx->rvlc) < 0) { if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, s->mb_intra, ctx->rvlc) < 0) {
@ -1305,7 +1305,7 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64])
} }
} while (cbpc == 20); } while (cbpc == 20);
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
dquant = cbpc & 8; dquant = cbpc & 8;
s->mb_intra = ((cbpc & 4) != 0); s->mb_intra = ((cbpc & 4) != 0);
if (s->mb_intra) if (s->mb_intra)
@ -1451,7 +1451,7 @@ static int mpeg4_decode_mb(MpegEncContext *s, int16_t block[6][64])
if (modb2) { if (modb2) {
cbp = 0; cbp = 0;
} else { } else {
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
cbp = get_bits(&s->gb, 6); cbp = get_bits(&s->gb, 6);
} }
@ -1586,7 +1586,7 @@ intra:
if (!s->progressive_sequence) if (!s->progressive_sequence)
s->interlaced_dct = get_bits1(&s->gb); s->interlaced_dct = get_bits1(&s->gb);
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
/* decode each block */ /* decode each block */
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, 1, 0) < 0) if (mpeg4_decode_block(ctx, block[i], i, cbp & 32, 1, 0) < 0)

@ -485,7 +485,7 @@ static inline int get_b_cbp(MpegEncContext *s, int16_t block[6][64],
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i)) & 1) == 0) { if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i)) & 1) == 0) {
s->block_last_index[i] = -1; s->block_last_index[i] = -1;
s->dsp.clear_block(s->block[i]); s->bdsp.clear_block(s->block[i]);
} }
} }
} else { } else {

@ -33,6 +33,7 @@
#include "libavutil/internal.h" #include "libavutil/internal.h"
#include "libavutil/timer.h" #include "libavutil/timer.h"
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "dsputil.h" #include "dsputil.h"
#include "internal.h" #include "internal.h"
#include "mathops.h" #include "mathops.h"
@ -363,7 +364,7 @@ static void mpeg_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
ff_init_block_index(s); ff_init_block_index(s);
ff_update_block_index(s); ff_update_block_index(s);
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
s->dest[0] = s->current_picture.f->data[0] + (s->mb_y * 16 * s->linesize) + s->mb_x * 16; s->dest[0] = s->current_picture.f->data[0] + (s->mb_y * 16 * s->linesize) + s->mb_x * 16;
s->dest[1] = s->current_picture.f->data[1] + (s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16 >> s->chroma_x_shift); s->dest[1] = s->current_picture.f->data[1] + (s->mb_y * (16 >> s->chroma_y_shift) * s->uvlinesize) + s->mb_x * (16 >> s->chroma_x_shift);
@ -376,6 +377,7 @@ static void mpeg_er_decode_mb(void *opaque, int ref, int mv_dir, int mv_type,
/* init common dct for both encoder and decoder */ /* init common dct for both encoder and decoder */
av_cold int ff_dct_common_init(MpegEncContext *s) av_cold int ff_dct_common_init(MpegEncContext *s)
{ {
ff_blockdsp_init(&s->bdsp, s->avctx);
ff_dsputil_init(&s->dsp, s->avctx); ff_dsputil_init(&s->dsp, s->avctx);
ff_hpeldsp_init(&s->hdsp, s->avctx->flags); ff_hpeldsp_init(&s->hdsp, s->avctx->flags);
ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample); ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);

@ -29,6 +29,7 @@
#define AVCODEC_MPEGVIDEO_H #define AVCODEC_MPEGVIDEO_H
#include "avcodec.h" #include "avcodec.h"
#include "blockdsp.h"
#include "dsputil.h" #include "dsputil.h"
#include "error_resilience.h" #include "error_resilience.h"
#include "get_bits.h" #include "get_bits.h"
@ -347,6 +348,7 @@ typedef struct MpegEncContext {
int unrestricted_mv; ///< mv can point outside of the coded picture int unrestricted_mv; ///< mv can point outside of the coded picture
int h263_long_vectors; ///< use horrible h263v1 long vector mode int h263_long_vectors; ///< use horrible h263v1 long vector mode
BlockDSPContext bdsp;
DSPContext dsp; ///< pointers for accelerated dsp functions DSPContext dsp; ///< pointers for accelerated dsp functions
HpelDSPContext hdsp; HpelDSPContext hdsp;
QpelDSPContext qdsp; QpelDSPContext qdsp;

@ -174,7 +174,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, int16_t block[6][64])
} }
} }
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{ {
@ -265,7 +265,7 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, int16_t block[6][64])
} }
} }
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{ {

@ -1,5 +1,6 @@
OBJS += ppc/fmtconvert_altivec.o \ OBJS += ppc/fmtconvert_altivec.o \
OBJS-$(CONFIG_BLOCKDSP) += ppc/blockdsp.o
OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o
OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o
OBJS-$(CONFIG_H264CHROMA) += ppc/h264chroma_init.o OBJS-$(CONFIG_H264CHROMA) += ppc/h264chroma_init.o

@ -0,0 +1,169 @@
/*
* Copyright (c) 2002 Brian Foley
* Copyright (c) 2002 Dieter Shirley
* Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#if HAVE_ALTIVEC_H
#include <altivec.h>
#endif
#include <string.h>
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/ppc/cpu.h"
#include "libavutil/ppc/types_altivec.h"
#include "libavcodec/blockdsp.h"
/* ***** WARNING ***** WARNING ***** WARNING ***** */
/*
* clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with
* a cache line size not equal to 32 bytes. Fortunately all processors used
* by Apple up to at least the 7450 (AKA second generation G4) use 32-byte
* cache lines. This is due to the use of the 'dcbz' instruction. It simply
* clears a single cache line to zero, so you need to know the cache line
* size to use it! It's absurd, but it's fast...
*
* update 24/06/2003: Apple released the G5 yesterday, with a PPC970.
* cache line size: 128 bytes. Oups.
* The semantics of dcbz was changed, it always clears 32 bytes. So the function
* below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
* which is defined to clear a cache line (as dcbz before). So we can still
* distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
*
* see <http://developer.apple.com/technotes/tn/tn2087.html>
* and <http://developer.apple.com/technotes/tn/tn2086.html>
*/
static void clear_blocks_dcbz32_ppc(int16_t *blocks)
{
register int misal = (unsigned long) blocks & 0x00000010, i = 0;
if (misal) {
((unsigned long *) blocks)[0] = 0L;
((unsigned long *) blocks)[1] = 0L;
((unsigned long *) blocks)[2] = 0L;
((unsigned long *) blocks)[3] = 0L;
i += 16;
}
for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32)
__asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory");
if (misal) {
((unsigned long *) blocks)[188] = 0L;
((unsigned long *) blocks)[189] = 0L;
((unsigned long *) blocks)[190] = 0L;
((unsigned long *) blocks)[191] = 0L;
i += 16;
}
}
/* Same as above, when dcbzl clears a whole 128 bytes cache line
* i.e. the PPC970 AKA G5. */
static void clear_blocks_dcbz128_ppc(int16_t *blocks)
{
#if HAVE_DCBZL
register int misal = (unsigned long) blocks & 0x0000007f, i = 0;
if (misal) {
/* We could probably also optimize this case,
* but there's not much point as the machines
* aren't available yet (2003-06-26). */
memset(blocks, 0, sizeof(int16_t) * 6 * 64);
} else {
for (; i < sizeof(int16_t) * 6 * 64; i += 128)
__asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory");
}
#else
memset(blocks, 0, sizeof(int16_t) * 6 * 64);
#endif
}
/* Check dcbz report how many bytes are set to 0 by dcbz. */
/* update 24/06/2003: Replace dcbz by dcbzl to get the intended effect
* (Apple "fixed" dcbz). Unfortunately this cannot be used unless the
* assembler knows about dcbzl ... */
static long check_dcbzl_effect(void)
{
long count = 0;
#if HAVE_DCBZL
register char *fakedata = av_malloc(1024);
register char *fakedata_middle;
register long zero = 0, i = 0;
if (!fakedata)
return 0L;
fakedata_middle = fakedata + 512;
memset(fakedata, 0xFF, 1024);
/* Below the constraint "b" seems to mean "address base register"
* in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */
__asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero));
for (i = 0; i < 1024; i++)
if (fakedata[i] == (char) 0)
count++;
av_free(fakedata);
#endif
return count;
}
#if HAVE_ALTIVEC
static void clear_block_altivec(int16_t *block)
{
LOAD_ZERO;
vec_st(zero_s16v, 0, block);
vec_st(zero_s16v, 16, block);
vec_st(zero_s16v, 32, block);
vec_st(zero_s16v, 48, block);
vec_st(zero_s16v, 64, block);
vec_st(zero_s16v, 80, block);
vec_st(zero_s16v, 96, block);
vec_st(zero_s16v, 112, block);
}
#endif /* HAVE_ALTIVEC */
av_cold void ff_blockdsp_init_ppc(BlockDSPContext *c, unsigned high_bit_depth)
{
// common optimizations whether AltiVec is available or not
if (!high_bit_depth) {
switch (check_dcbzl_effect()) {
case 32:
c->clear_blocks = clear_blocks_dcbz32_ppc;
break;
case 128:
c->clear_blocks = clear_blocks_dcbz128_ppc;
break;
default:
break;
}
}
#if HAVE_ALTIVEC
if (!PPC_ALTIVEC(av_get_cpu_flags()))
return;
if (!high_bit_depth)
c->clear_block = clear_block_altivec;
#endif /* HAVE_ALTIVEC */
}

@ -558,19 +558,6 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
} }
} }
static void clear_block_altivec(int16_t *block)
{
LOAD_ZERO;
vec_st(zero_s16v, 0, block);
vec_st(zero_s16v, 16, block);
vec_st(zero_s16v, 32, block);
vec_st(zero_s16v, 48, block);
vec_st(zero_s16v, 64, block);
vec_st(zero_s16v, 80, block);
vec_st(zero_s16v, 96, block);
vec_st(zero_s16v, 112, block);
}
static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst,
uint8_t *src, int stride, int h) uint8_t *src, int stride, int h)
{ {
@ -931,7 +918,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx,
if (!high_bit_depth) { if (!high_bit_depth) {
c->get_pixels = get_pixels_altivec; c->get_pixels = get_pixels_altivec;
c->clear_block = clear_block_altivec;
} }
c->hadamard8_diff[0] = hadamard8_diff16_altivec; c->hadamard8_diff[0] = hadamard8_diff16_altivec;

@ -24,124 +24,14 @@
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/mem.h"
#include "libavutil/ppc/cpu.h" #include "libavutil/ppc/cpu.h"
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "dsputil_altivec.h" #include "dsputil_altivec.h"
/* ***** WARNING ***** WARNING ***** WARNING ***** */
/*
* clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with
* a cache line size not equal to 32 bytes. Fortunately all processors used
* by Apple up to at least the 7450 (AKA second generation G4) use 32-byte
* cache lines. This is due to the use of the 'dcbz' instruction. It simply
* clears a single cache line to zero, so you need to know the cache line
* size to use it! It's absurd, but it's fast...
*
* update 24/06/2003: Apple released the G5 yesterday, with a PPC970.
* cache line size: 128 bytes. Oups.
* The semantics of dcbz was changed, it always clears 32 bytes. So the function
* below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
* which is defined to clear a cache line (as dcbz before). So we can still
* distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
*
* see <http://developer.apple.com/technotes/tn/tn2087.html>
* and <http://developer.apple.com/technotes/tn/tn2086.html>
*/
static void clear_blocks_dcbz32_ppc(int16_t *blocks)
{
register int misal = (unsigned long) blocks & 0x00000010, i = 0;
if (misal) {
((unsigned long *) blocks)[0] = 0L;
((unsigned long *) blocks)[1] = 0L;
((unsigned long *) blocks)[2] = 0L;
((unsigned long *) blocks)[3] = 0L;
i += 16;
}
for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32)
__asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory");
if (misal) {
((unsigned long *) blocks)[188] = 0L;
((unsigned long *) blocks)[189] = 0L;
((unsigned long *) blocks)[190] = 0L;
((unsigned long *) blocks)[191] = 0L;
i += 16;
}
}
/* Same as above, when dcbzl clears a whole 128 bytes cache line
* i.e. the PPC970 AKA G5. */
static void clear_blocks_dcbz128_ppc(int16_t *blocks)
{
#if HAVE_DCBZL
register int misal = (unsigned long) blocks & 0x0000007f, i = 0;
if (misal) {
/* We could probably also optimize this case,
* but there's not much point as the machines
* aren't available yet (2003-06-26). */
memset(blocks, 0, sizeof(int16_t) * 6 * 64);
} else {
for (; i < sizeof(int16_t) * 6 * 64; i += 128)
__asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory");
}
#else
memset(blocks, 0, sizeof(int16_t) * 6 * 64);
#endif
}
/* Check dcbz report how many bytes are set to 0 by dcbz. */
/* update 24/06/2003: Replace dcbz by dcbzl to get the intended effect
* (Apple "fixed" dcbz). Unfortunately this cannot be used unless the
* assembler knows about dcbzl ... */
static long check_dcbzl_effect(void)
{
long count = 0;
#if HAVE_DCBZL
register char *fakedata = av_malloc(1024);
register char *fakedata_middle;
register long zero = 0, i = 0;
if (!fakedata)
return 0L;
fakedata_middle = fakedata + 512;
memset(fakedata, 0xFF, 1024);
/* Below the constraint "b" seems to mean "address base register"
* in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */
__asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero));
for (i = 0; i < 1024; i++)
if (fakedata[i] == (char) 0)
count++;
av_free(fakedata);
#endif
return count;
}
av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth) unsigned high_bit_depth)
{ {
// common optimizations whether AltiVec is available or not
if (!high_bit_depth) {
switch (check_dcbzl_effect()) {
case 32:
c->clear_blocks = clear_blocks_dcbz32_ppc;
break;
case 128:
c->clear_blocks = clear_blocks_dcbz128_ppc;
break;
default:
break;
}
}
if (PPC_ALTIVEC(av_get_cpu_flags())) { if (PPC_ALTIVEC(av_get_cpu_flags())) {
ff_dsputil_init_altivec(c, avctx, high_bit_depth); ff_dsputil_init_altivec(c, avctx, high_bit_depth);
ff_int_init_altivec(c, avctx); ff_int_init_altivec(c, avctx);

@ -3019,7 +3019,7 @@ static int vc1_decode_intra_block(VC1Context *v, int16_t block[64], int n,
int scale; int scale;
int q1, q2 = 0; int q1, q2 = 0;
s->dsp.clear_block(block); s->bdsp.clear_block(block);
/* XXX: Guard against dumb values of mquant */ /* XXX: Guard against dumb values of mquant */
mquant = (mquant < 1) ? 0 : ((mquant > 31) ? 31 : mquant); mquant = (mquant < 1) ? 0 : ((mquant > 31) ? 31 : mquant);
@ -3226,7 +3226,7 @@ static int vc1_decode_p_block(VC1Context *v, int16_t block[64], int n,
int ttblk = ttmb & 7; int ttblk = ttmb & 7;
int pat = 0; int pat = 0;
s->dsp.clear_block(block); s->bdsp.clear_block(block);
if (ttmb == -1) { if (ttmb == -1) {
ttblk = ff_vc1_ttblk_to_tt[v->tt_index][get_vlc2(gb, ff_vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)]; ttblk = ff_vc1_ttblk_to_tt[v->tt_index][get_vlc2(gb, ff_vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)];
@ -4797,7 +4797,7 @@ static void vc1_decode_i_blocks(VC1Context *v)
dst[3] = dst[2] + 8; dst[3] = dst[2] + 8;
dst[4] = s->dest[1]; dst[4] = s->dest[1];
dst[5] = s->dest[2]; dst[5] = s->dest[2];
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
mb_pos = s->mb_x + s->mb_y * s->mb_width; mb_pos = s->mb_x + s->mb_y * s->mb_width;
s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA; s->current_picture.mb_type[mb_pos] = MB_TYPE_INTRA;
s->current_picture.qscale_table[mb_pos] = v->pq; s->current_picture.qscale_table[mb_pos] = v->pq;
@ -4937,7 +4937,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v)
for (;s->mb_x < s->mb_width; s->mb_x++) { for (;s->mb_x < s->mb_width; s->mb_x++) {
int16_t (*block)[64] = v->block[v->cur_blk_idx]; int16_t (*block)[64] = v->block[v->cur_blk_idx];
ff_update_block_index(s); ff_update_block_index(s);
s->dsp.clear_blocks(block[0]); s->bdsp.clear_blocks(block[0]);
mb_pos = s->mb_x + s->mb_y * s->mb_stride; mb_pos = s->mb_x + s->mb_y * s->mb_stride;
s->current_picture.mb_type[mb_pos + v->mb_off] = MB_TYPE_INTRA; s->current_picture.mb_type[mb_pos + v->mb_off] = MB_TYPE_INTRA;
s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0; s->current_picture.motion_val[1][s->block_index[0] + v->blocks_off][0] = 0;
@ -5603,6 +5603,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
if (ff_vc1_init_common(v) < 0) if (ff_vc1_init_common(v) < 0)
return -1; return -1;
ff_blockdsp_init(&s->bdsp, avctx);
ff_h264chroma_init(&v->h264chroma, 8); ff_h264chroma_init(&v->h264chroma, 8);
ff_qpeldsp_init(&s->qdsp); ff_qpeldsp_init(&s->qdsp);
ff_vc1dsp_init(&v->vc1dsp); ff_vc1dsp_init(&v->vc1dsp);

@ -28,6 +28,7 @@
av_cold void ff_wmv2_common_init(Wmv2Context * w){ av_cold void ff_wmv2_common_init(Wmv2Context * w){
MpegEncContext * const s= &w->s; MpegEncContext * const s= &w->s;
ff_blockdsp_init(&s->bdsp, s->avctx);
ff_wmv2dsp_init(&w->wdsp); ff_wmv2dsp_init(&w->wdsp);
s->dsp.idct_permutation_type = w->wdsp.idct_perm; s->dsp.idct_permutation_type = w->wdsp.idct_perm;
ff_init_scantable_permutation(s->dsp.idct_permutation, ff_init_scantable_permutation(s->dsp.idct_permutation,
@ -60,12 +61,12 @@ static void wmv2_add_block(Wmv2Context *w, int16_t *block1, uint8_t *dst, int st
case 1: case 1:
ff_simple_idct84_add(dst , stride, block1); ff_simple_idct84_add(dst , stride, block1);
ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]); ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]);
s->dsp.clear_block(w->abt_block2[n]); s->bdsp.clear_block(w->abt_block2[n]);
break; break;
case 2: case 2:
ff_simple_idct48_add(dst , stride, block1); ff_simple_idct48_add(dst , stride, block1);
ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]); ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]);
s->dsp.clear_block(w->abt_block2[n]); s->bdsp.clear_block(w->abt_block2[n]);
break; break;
default: default:
av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n"); av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n");

@ -385,7 +385,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64])
wmv2_pred_motion(w, &mx, &my); wmv2_pred_motion(w, &mx, &my);
if(cbp){ if(cbp){
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
if(s->per_mb_rl_table){ if(s->per_mb_rl_table){
s->rl_table_index = decode012(&s->gb); s->rl_table_index = decode012(&s->gb);
s->rl_chroma_table_index = s->rl_table_index; s->rl_chroma_table_index = s->rl_table_index;
@ -431,7 +431,7 @@ int ff_wmv2_decode_mb(MpegEncContext *s, int16_t block[6][64])
s->rl_chroma_table_index = s->rl_table_index; s->rl_chroma_table_index = s->rl_table_index;
} }
s->dsp.clear_blocks(s->block[0]); s->bdsp.clear_blocks(s->block[0]);
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) if (ff_msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{ {

@ -44,6 +44,7 @@ OBJS-$(CONFIG_VP7_DECODER) += x86/vp8dsp_init.o
OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o
OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o
MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o
MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \
x86/idct_mmx_xvid.o \ x86/idct_mmx_xvid.o \
x86/idct_sse2_xvid.o \ x86/idct_sse2_xvid.o \

@ -0,0 +1,120 @@
/*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <stdint.h>
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/internal.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/blockdsp.h"
#include "libavcodec/version.h"
#if HAVE_INLINE_ASM
#define CLEAR_BLOCKS(name, n) \
static void name(int16_t *blocks) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"mov %1, %%"REG_a" \n\t" \
"1: \n\t" \
"movq %%mm7, (%0, %%"REG_a") \n\t" \
"movq %%mm7, 8(%0, %%"REG_a") \n\t" \
"movq %%mm7, 16(%0, %%"REG_a") \n\t" \
"movq %%mm7, 24(%0, %%"REG_a") \n\t" \
"add $32, %%"REG_a" \n\t" \
"js 1b \n\t" \
:: "r"(((uint8_t *) blocks) + 128 * n), \
"i"(-128 * n) \
: "%"REG_a); \
}
CLEAR_BLOCKS(clear_blocks_mmx, 6)
CLEAR_BLOCKS(clear_block_mmx, 1)
static void clear_block_sse(int16_t *block)
{
__asm__ volatile (
"xorps %%xmm0, %%xmm0 \n"
"movaps %%xmm0, (%0) \n"
"movaps %%xmm0, 16(%0) \n"
"movaps %%xmm0, 32(%0) \n"
"movaps %%xmm0, 48(%0) \n"
"movaps %%xmm0, 64(%0) \n"
"movaps %%xmm0, 80(%0) \n"
"movaps %%xmm0, 96(%0) \n"
"movaps %%xmm0, 112(%0) \n"
:: "r" (block)
: "memory");
}
static void clear_blocks_sse(int16_t *blocks)
{
__asm__ volatile (
"xorps %%xmm0, %%xmm0 \n"
"mov %1, %%"REG_a" \n"
"1: \n"
"movaps %%xmm0, (%0, %%"REG_a") \n"
"movaps %%xmm0, 16(%0, %%"REG_a") \n"
"movaps %%xmm0, 32(%0, %%"REG_a") \n"
"movaps %%xmm0, 48(%0, %%"REG_a") \n"
"movaps %%xmm0, 64(%0, %%"REG_a") \n"
"movaps %%xmm0, 80(%0, %%"REG_a") \n"
"movaps %%xmm0, 96(%0, %%"REG_a") \n"
"movaps %%xmm0, 112(%0, %%"REG_a") \n"
"add $128, %%"REG_a" \n"
"js 1b \n"
:: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6)
: "%"REG_a);
}
#endif /* HAVE_INLINE_ASM */
#if FF_API_XVMC
av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth,
AVCodecContext *avctx)
#else
av_cold void ff_blockdsp_init_x86(BlockDSPContext *c, unsigned high_bit_depth)
#endif /* FF_API_XVMC */
{
#if HAVE_INLINE_ASM
int cpu_flags = av_get_cpu_flags();
if (!high_bit_depth) {
if (INLINE_MMX(cpu_flags)) {
c->clear_block = clear_block_mmx;
c->clear_blocks = clear_blocks_mmx;
}
#if FF_API_XVMC
FF_DISABLE_DEPRECATION_WARNINGS
/* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)
return;
FF_ENABLE_DEPRECATION_WARNINGS
#endif /* FF_API_XVMC */
if (INLINE_SSE(cpu_flags)) {
c->clear_block = clear_block_sse;
c->clear_blocks = clear_blocks_sse;
}
}
#endif /* HAVE_INLINE_ASM */
}

@ -19,12 +19,10 @@
#include "config.h" #include "config.h"
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/cpu.h" #include "libavutil/cpu.h"
#include "libavutil/internal.h"
#include "libavutil/x86/cpu.h" #include "libavutil/x86/cpu.h"
#include "libavcodec/avcodec.h" #include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "libavcodec/simple_idct.h" #include "libavcodec/simple_idct.h"
#include "libavcodec/version.h"
#include "dsputil_x86.h" #include "dsputil_x86.h"
#include "idct_xvid.h" #include "idct_xvid.h"
@ -54,8 +52,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
c->add_pixels_clamped = ff_add_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
if (!high_bit_depth) { if (!high_bit_depth) {
c->clear_block = ff_clear_block_mmx;
c->clear_blocks = ff_clear_blocks_mmx;
c->draw_edges = ff_draw_edges_mmx; c->draw_edges = ff_draw_edges_mmx;
switch (avctx->idct_algo) { switch (avctx->idct_algo) {
@ -103,19 +99,6 @@ static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
{ {
#if HAVE_SSE_INLINE #if HAVE_SSE_INLINE
c->vector_clipf = ff_vector_clipf_sse; c->vector_clipf = ff_vector_clipf_sse;
#if FF_API_XVMC
FF_DISABLE_DEPRECATION_WARNINGS
/* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
if (CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)
return;
FF_ENABLE_DEPRECATION_WARNINGS
#endif /* FF_API_XVMC */
if (!high_bit_depth) {
c->clear_block = ff_clear_block_sse;
c->clear_blocks = ff_clear_blocks_sse;
}
#endif /* HAVE_SSE_INLINE */ #endif /* HAVE_SSE_INLINE */
} }

@ -166,62 +166,6 @@ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
} while (--i); } while (--i);
} }
#define CLEAR_BLOCKS(name, n) \
void name(int16_t *blocks) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"mov %1, %%"REG_a" \n\t" \
"1: \n\t" \
"movq %%mm7, (%0, %%"REG_a") \n\t" \
"movq %%mm7, 8(%0, %%"REG_a") \n\t" \
"movq %%mm7, 16(%0, %%"REG_a") \n\t" \
"movq %%mm7, 24(%0, %%"REG_a") \n\t" \
"add $32, %%"REG_a" \n\t" \
"js 1b \n\t" \
:: "r"(((uint8_t *) blocks) + 128 * n), \
"i"(-128 * n) \
: "%"REG_a); \
}
CLEAR_BLOCKS(ff_clear_blocks_mmx, 6)
CLEAR_BLOCKS(ff_clear_block_mmx, 1)
void ff_clear_block_sse(int16_t *block)
{
__asm__ volatile (
"xorps %%xmm0, %%xmm0 \n"
"movaps %%xmm0, (%0) \n"
"movaps %%xmm0, 16(%0) \n"
"movaps %%xmm0, 32(%0) \n"
"movaps %%xmm0, 48(%0) \n"
"movaps %%xmm0, 64(%0) \n"
"movaps %%xmm0, 80(%0) \n"
"movaps %%xmm0, 96(%0) \n"
"movaps %%xmm0, 112(%0) \n"
:: "r" (block)
: "memory");
}
void ff_clear_blocks_sse(int16_t *blocks)
{
__asm__ volatile (
"xorps %%xmm0, %%xmm0 \n"
"mov %1, %%"REG_a" \n"
"1: \n"
"movaps %%xmm0, (%0, %%"REG_a") \n"
"movaps %%xmm0, 16(%0, %%"REG_a") \n"
"movaps %%xmm0, 32(%0, %%"REG_a") \n"
"movaps %%xmm0, 48(%0, %%"REG_a") \n"
"movaps %%xmm0, 64(%0, %%"REG_a") \n"
"movaps %%xmm0, 80(%0, %%"REG_a") \n"
"movaps %%xmm0, 96(%0, %%"REG_a") \n"
"movaps %%xmm0, 112(%0, %%"REG_a") \n"
"add $128, %%"REG_a" \n"
"js 1b \n"
:: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6)
: "%"REG_a);
}
/* Draw the edges of width 'w' of an image of size width, height /* Draw the edges of width 'w' of an image of size width, height
* this MMX version can only handle w == 8 || w == 16. */ * this MMX version can only handle w == 8 || w == 16. */
void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,

@ -38,11 +38,6 @@ void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels,
int line_size); int line_size);
void ff_clear_block_mmx(int16_t *block);
void ff_clear_block_sse(int16_t *block);
void ff_clear_blocks_mmx(int16_t *blocks);
void ff_clear_blocks_sse(int16_t *blocks);
void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
int w, int h, int sides); int w, int h, int sides);

Loading…
Cancel
Save