diff --git a/configure b/configure index c271994f93..a65ccd8861 100755 --- a/configure +++ b/configure @@ -1529,6 +1529,7 @@ CONFIG_EXTRA=" aandcttables ac3dsp audio_frame_queue + audiodsp blockdsp cabac dsputil @@ -1713,8 +1714,8 @@ aac_decoder_select="mdct sinewin" aac_encoder_select="audio_frame_queue mdct sinewin" aac_latm_decoder_select="aac_decoder aac_latm_parser" ac3_decoder_select="mdct ac3dsp ac3_parser dsputil" -ac3_encoder_select="mdct ac3dsp dsputil" -ac3_fixed_encoder_select="mdct ac3dsp dsputil" +ac3_encoder_select="ac3dsp audiodsp dsputil mdct" +ac3_fixed_encoder_select="ac3dsp audiodsp dsputil mdct" aic_decoder_select="dsputil golomb" alac_encoder_select="lpc" als_decoder_select="dsputil" @@ -1735,7 +1736,7 @@ binkaudio_rdft_decoder_select="mdct rdft sinewin" cavs_decoder_select="blockdsp dsputil golomb h264chroma qpeldsp videodsp" cllc_decoder_select="dsputil" comfortnoise_encoder_select="lpc" -cook_decoder_select="dsputil mdct sinewin" +cook_decoder_select="audiodsp mdct sinewin" cscd_decoder_select="lzo" cscd_decoder_suggest="zlib" dca_decoder_select="mdct" @@ -1849,7 +1850,7 @@ svq1_decoder_select="hpeldsp" svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc" svq3_decoder_select="h264_decoder hpeldsp tpeldsp" svq3_decoder_suggest="zlib" -tak_decoder_select="dsputil" +tak_decoder_select="audiodsp" theora_decoder_select="vp3_decoder" thp_decoder_select="mjpeg_decoder" tiff_decoder_suggest="zlib" diff --git a/libavcodec/Makefile b/libavcodec/Makefile index c59154536f..c2f75329e4 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -28,6 +28,7 @@ OBJS = allcodecs.o \ OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o OBJS-$(CONFIG_AC3DSP) += ac3dsp.o OBJS-$(CONFIG_AUDIO_FRAME_QUEUE) += audio_frame_queue.o +OBJS-$(CONFIG_AUDIODSP) += audiodsp.o OBJS-$(CONFIG_BLOCKDSP) += blockdsp.o OBJS-$(CONFIG_CABAC) += cabac.o OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c index cc8df47691..c6dc141eea 100644 --- a/libavcodec/ac3enc.c +++ b/libavcodec/ac3enc.c @@ -37,6 +37,7 @@ #include "libavutil/opt.h" #include "avcodec.h" #include "put_bits.h" +#include "audiodsp.h" #include "ac3dsp.h" #include "ac3.h" #include "fft.h" @@ -2480,6 +2481,7 @@ av_cold int ff_ac3_encode_init(AVCodecContext *avctx) if (ret) goto init_fail; + ff_audiodsp_init(&s->adsp); ff_dsputil_init(&s->dsp, avctx); ff_ac3dsp_init(&s->ac3dsp, avctx->flags & CODEC_FLAG_BITEXACT); diff --git a/libavcodec/ac3enc.h b/libavcodec/ac3enc.h index 90bbf2aa92..b8e8768913 100644 --- a/libavcodec/ac3enc.h +++ b/libavcodec/ac3enc.h @@ -39,6 +39,7 @@ #include "fft.h" #include "mathops.h" #include "put_bits.h" +#include "audiodsp.h" #ifndef CONFIG_AC3ENC_FLOAT #define CONFIG_AC3ENC_FLOAT 0 @@ -162,6 +163,7 @@ typedef struct AC3EncodeContext { AVCodecContext *avctx; ///< parent AVCodecContext PutBitContext pb; ///< bitstream writer context DSPContext dsp; + AudioDSPContext adsp; AVFloatDSPContext fdsp; AC3DSPContext ac3dsp; ///< AC-3 optimized functions FFTContext mdct; ///< FFT context for MDCT calculation diff --git a/libavcodec/ac3enc_fixed.c b/libavcodec/ac3enc_fixed.c index f76d2ad394..2bb82ef3b6 100644 --- a/libavcodec/ac3enc_fixed.c +++ b/libavcodec/ac3enc_fixed.c @@ -29,6 +29,7 @@ #define FFT_FLOAT 0 #undef CONFIG_AC3ENC_FLOAT #include "internal.h" +#include "audiodsp.h" #include "ac3enc.h" #include "eac3enc.h" @@ -100,9 +101,10 @@ static void scale_coefficients(AC3EncodeContext *s) /* * Clip MDCT coefficients to allowable range. */ -static void clip_coefficients(DSPContext *dsp, int32_t *coef, unsigned int len) +static void clip_coefficients(AudioDSPContext *adsp, int32_t *coef, + unsigned int len) { - dsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len); + adsp->vector_clip_int32(coef, coef, COEF_MIN, COEF_MAX, len); } diff --git a/libavcodec/ac3enc_float.c b/libavcodec/ac3enc_float.c index 6b6290fbb9..d106d1b263 100644 --- a/libavcodec/ac3enc_float.c +++ b/libavcodec/ac3enc_float.c @@ -28,6 +28,7 @@ #define CONFIG_AC3ENC_FLOAT 1 #include "internal.h" +#include "audiodsp.h" #include "ac3enc.h" #include "eac3enc.h" #include "kbdwin.h" @@ -107,9 +108,10 @@ static void scale_coefficients(AC3EncodeContext *s) /* * Clip MDCT coefficients to allowable range. */ -static void clip_coefficients(DSPContext *dsp, float *coef, unsigned int len) +static void clip_coefficients(AudioDSPContext *adsp, float *coef, + unsigned int len) { - dsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); + adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); } diff --git a/libavcodec/ac3enc_template.c b/libavcodec/ac3enc_template.c index ad296e1590..79b4946b65 100644 --- a/libavcodec/ac3enc_template.c +++ b/libavcodec/ac3enc_template.c @@ -30,6 +30,8 @@ #include "libavutil/attributes.h" #include "libavutil/internal.h" + +#include "audiodsp.h" #include "internal.h" #include "ac3enc.h" #include "eac3enc.h" @@ -40,7 +42,8 @@ static void scale_coefficients(AC3EncodeContext *s); static int normalize_samples(AC3EncodeContext *s); -static void clip_coefficients(DSPContext *dsp, CoefType *coef, unsigned int len); +static void clip_coefficients(AudioDSPContext *adsp, CoefType *coef, + unsigned int len); static CoefType calc_cpl_coord(CoefSumType energy_ch, CoefSumType energy_cpl); @@ -161,7 +164,7 @@ static void apply_channel_coupling(AC3EncodeContext *s) } /* coefficients must be clipped in order to be encoded */ - clip_coefficients(&s->dsp, cpl_coef, num_cpl_coefs); + clip_coefficients(&s->adsp, cpl_coef, num_cpl_coefs); } /* calculate energy in each band in coupling channel and each fbw channel */ @@ -412,7 +415,7 @@ int AC3_NAME(encode_frame)(AVCodecContext *avctx, AVPacket *avpkt, if (s->fixed_point) scale_coefficients(s); - clip_coefficients(&s->dsp, s->blocks[0].mdct_coef[1], + clip_coefficients(&s->adsp, s->blocks[0].mdct_coef[1], AC3_MAX_COEFS * s->num_blocks * s->channels); s->cpl_on = s->cpl_enabled; diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c index ab09bdb6c5..19657729ce 100644 --- a/libavcodec/acelp_pitch_delay.c +++ b/libavcodec/acelp_pitch_delay.c @@ -26,6 +26,7 @@ #include "avcodec.h" #include "acelp_pitch_delay.h" #include "celp_math.h" +#include "audiodsp.h" int ff_acelp_decode_8bit_to_1st_delay3(int ac_index) { @@ -90,7 +91,7 @@ void ff_acelp_update_past_gain( } int16_t ff_acelp_decode_gain_code( - DSPContext *dsp, + AudioDSPContext *adsp, int gain_corr_factor, const int16_t* fc_v, int mr_energy, @@ -107,7 +108,7 @@ int16_t ff_acelp_decode_gain_code( mr_energy += quant_energy[i] * ma_prediction_coeff[i]; mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) / - sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size)); + sqrt(adsp->scalarproduct_int16(fc_v, fc_v, subframe_size)); return mr_energy >> 12; } diff --git a/libavcodec/acelp_pitch_delay.h b/libavcodec/acelp_pitch_delay.h index e5410bba7f..7b5b33d9b4 100644 --- a/libavcodec/acelp_pitch_delay.h +++ b/libavcodec/acelp_pitch_delay.h @@ -24,7 +24,8 @@ #define AVCODEC_ACELP_PITCH_DELAY_H #include -#include "dsputil.h" + +#include "audiodsp.h" #define PITCH_DELAY_MIN 20 #define PITCH_DELAY_MAX 143 @@ -139,7 +140,7 @@ void ff_acelp_update_past_gain( /** * @brief Decode the adaptive codebook gain and add * correction (4.1.5 and 3.9.1 of G.729). - * @param dsp initialized dsputil context + * @param adsp initialized audio DSP context * @param gain_corr_factor gain correction factor (2.13) * @param fc_v fixed-codebook vector (2.13) * @param mr_energy mean innovation energy and fixed-point correction (7.13) @@ -208,7 +209,7 @@ void ff_acelp_update_past_gain( * @remark The routine is used in G.729 and AMR (all modes). */ int16_t ff_acelp_decode_gain_code( - DSPContext *dsp, + AudioDSPContext *adsp, int gain_corr_factor, const int16_t* fc_v, int mr_energy, diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile index 381e997c0c..eb92a8c953 100644 --- a/libavcodec/arm/Makefile +++ b/libavcodec/arm/Makefile @@ -4,6 +4,7 @@ OBJS += arm/fmtconvert_init_arm.o OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ arm/ac3dsp_arm.o +OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_arm.o OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_arm.o OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_arm.o \ arm/dsputil_arm.o \ @@ -77,11 +78,13 @@ VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ NEON-OBJS += arm/fmtconvert_neon.o NEON-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_neon.o +NEON-OBJS-$(CONFIG_AUDIODSP) += arm/audiodsp_init_neon.o \ + arm/audiodsp_neon.o \ + arm/int_neon.o NEON-OBJS-$(CONFIG_BLOCKDSP) += arm/blockdsp_init_neon.o \ arm/blockdsp_neon.o NEON-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_neon.o \ arm/dsputil_neon.o \ - arm/int_neon.o \ arm/simple_idct_neon.o NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ arm/fft_fixed_neon.o diff --git a/libavcodec/arm/audiodsp_arm.h b/libavcodec/arm/audiodsp_arm.h new file mode 100644 index 0000000000..e97e804de7 --- /dev/null +++ b/libavcodec/arm/audiodsp_arm.h @@ -0,0 +1,26 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_ARM_AUDIODSP_ARM_H +#define AVCODEC_ARM_AUDIODSP_ARM_H + +#include "libavcodec/audiodsp.h" + +void ff_audiodsp_init_neon(AudioDSPContext *c); + +#endif /* AVCODEC_ARM_AUDIODSP_ARM_H */ diff --git a/libavcodec/arm/audiodsp_init_arm.c b/libavcodec/arm/audiodsp_init_arm.c new file mode 100644 index 0000000000..ea9ec3ca10 --- /dev/null +++ b/libavcodec/arm/audiodsp_init_arm.c @@ -0,0 +1,33 @@ +/* + * ARM optimized audio functions + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/arm/cpu.h" +#include "libavcodec/audiodsp.h" +#include "audiodsp_arm.h" + +av_cold void ff_audiodsp_init_arm(AudioDSPContext *c) +{ + int cpu_flags = av_get_cpu_flags(); + + if (have_neon(cpu_flags)) + ff_audiodsp_init_neon(c); +} diff --git a/libavcodec/arm/audiodsp_init_neon.c b/libavcodec/arm/audiodsp_init_neon.c new file mode 100644 index 0000000000..af532724c8 --- /dev/null +++ b/libavcodec/arm/audiodsp_init_neon.c @@ -0,0 +1,41 @@ +/* + * ARM NEON optimised audio functions + * Copyright (c) 2008 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavcodec/audiodsp.h" +#include "audiodsp_arm.h" + +void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, + int len); +void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len); + +int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); + +av_cold void ff_audiodsp_init_neon(AudioDSPContext *c) +{ + c->vector_clip_int32 = ff_vector_clip_int32_neon; + c->vector_clipf = ff_vector_clipf_neon; + + c->scalarproduct_int16 = ff_scalarproduct_int16_neon; +} diff --git a/libavcodec/arm/audiodsp_neon.S b/libavcodec/arm/audiodsp_neon.S new file mode 100644 index 0000000000..dfb998de32 --- /dev/null +++ b/libavcodec/arm/audiodsp_neon.S @@ -0,0 +1,64 @@ +/* + * ARM NEON optimised audio functions + * Copyright (c) 2008 Mans Rullgard + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +function ff_vector_clipf_neon, export=1 +VFP vdup.32 q1, d0[1] +VFP vdup.32 q0, d0[0] +NOVFP vdup.32 q0, r2 +NOVFP vdup.32 q1, r3 +NOVFP ldr r2, [sp] + vld1.f32 {q2},[r1,:128]! + vmin.f32 q10, q2, q1 + vld1.f32 {q3},[r1,:128]! + vmin.f32 q11, q3, q1 +1: vmax.f32 q8, q10, q0 + vmax.f32 q9, q11, q0 + subs r2, r2, #8 + beq 2f + vld1.f32 {q2},[r1,:128]! + vmin.f32 q10, q2, q1 + vld1.f32 {q3},[r1,:128]! + vmin.f32 q11, q3, q1 + vst1.f32 {q8},[r0,:128]! + vst1.f32 {q9},[r0,:128]! + b 1b +2: vst1.f32 {q8},[r0,:128]! + vst1.f32 {q9},[r0,:128]! + bx lr +endfunc + +function ff_vector_clip_int32_neon, export=1 + vdup.32 q0, r2 + vdup.32 q1, r3 + ldr r2, [sp] +1: + vld1.32 {q2-q3}, [r1,:128]! + vmin.s32 q2, q2, q1 + vmin.s32 q3, q3, q1 + vmax.s32 q2, q2, q0 + vmax.s32 q3, q3, q0 + vst1.32 {q2-q3}, [r0,:128]! + subs r2, r2, #8 + bgt 1b + bx lr +endfunc diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 6863e05f73..9d4c76ce58 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -34,13 +34,6 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int); void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int); -void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, - int len); -void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, - int32_t max, unsigned int len); - -int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); - av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { @@ -57,9 +50,4 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, c->add_pixels_clamped = ff_add_pixels_clamped_neon; c->put_pixels_clamped = ff_put_pixels_clamped_neon; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon; - - c->vector_clipf = ff_vector_clipf_neon; - c->vector_clip_int32 = ff_vector_clip_int32_neon; - - c->scalarproduct_int16 = ff_scalarproduct_int16_neon; } diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S index d494ec7ed7..ed6f218380 100644 --- a/libavcodec/arm/dsputil_neon.S +++ b/libavcodec/arm/dsputil_neon.S @@ -126,45 +126,3 @@ function ff_add_pixels_clamped_neon, export=1 vst1.8 {d6}, [r3,:64], r2 bx lr endfunc - -function ff_vector_clipf_neon, export=1 -VFP vdup.32 q1, d0[1] -VFP vdup.32 q0, d0[0] -NOVFP vdup.32 q0, r2 -NOVFP vdup.32 q1, r3 -NOVFP ldr r2, [sp] - vld1.f32 {q2},[r1,:128]! - vmin.f32 q10, q2, q1 - vld1.f32 {q3},[r1,:128]! - vmin.f32 q11, q3, q1 -1: vmax.f32 q8, q10, q0 - vmax.f32 q9, q11, q0 - subs r2, r2, #8 - beq 2f - vld1.f32 {q2},[r1,:128]! - vmin.f32 q10, q2, q1 - vld1.f32 {q3},[r1,:128]! - vmin.f32 q11, q3, q1 - vst1.f32 {q8},[r0,:128]! - vst1.f32 {q9},[r0,:128]! - b 1b -2: vst1.f32 {q8},[r0,:128]! - vst1.f32 {q9},[r0,:128]! - bx lr -endfunc - -function ff_vector_clip_int32_neon, export=1 - vdup.32 q0, r2 - vdup.32 q1, r3 - ldr r2, [sp] -1: - vld1.32 {q2-q3}, [r1,:128]! - vmin.s32 q2, q2, q1 - vmin.s32 q3, q3, q1 - vmax.s32 q2, q2, q0 - vmax.s32 q3, q3, q0 - vst1.32 {q2-q3}, [r0,:128]! - subs r2, r2, #8 - bgt 1b - bx lr -endfunc diff --git a/libavcodec/audiodsp.c b/libavcodec/audiodsp.c new file mode 100644 index 0000000000..f7e6167cb0 --- /dev/null +++ b/libavcodec/audiodsp.c @@ -0,0 +1,118 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "libavutil/attributes.h" +#include "libavutil/common.h" +#include "audiodsp.h" + +static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, + uint32_t maxi, uint32_t maxisign) +{ + if (a > mini) + return mini; + else if ((a ^ (1U << 31)) > maxisign) + return maxi; + else + return a; +} + +static void vector_clipf_c_opposite_sign(float *dst, const float *src, + float *min, float *max, int len) +{ + int i; + uint32_t mini = *(uint32_t *) min; + uint32_t maxi = *(uint32_t *) max; + uint32_t maxisign = maxi ^ (1U << 31); + uint32_t *dsti = (uint32_t *) dst; + const uint32_t *srci = (const uint32_t *) src; + + for (i = 0; i < len; i += 8) { + dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); + dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); + dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); + dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); + dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); + dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); + dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); + dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); + } +} + +static void vector_clipf_c(float *dst, const float *src, + float min, float max, int len) +{ + int i; + + if (min < 0 && max > 0) { + vector_clipf_c_opposite_sign(dst, src, &min, &max, len); + } else { + for (i = 0; i < len; i += 8) { + dst[i] = av_clipf(src[i], min, max); + dst[i + 1] = av_clipf(src[i + 1], min, max); + dst[i + 2] = av_clipf(src[i + 2], min, max); + dst[i + 3] = av_clipf(src[i + 3], min, max); + dst[i + 4] = av_clipf(src[i + 4], min, max); + dst[i + 5] = av_clipf(src[i + 5], min, max); + dst[i + 6] = av_clipf(src[i + 6], min, max); + dst[i + 7] = av_clipf(src[i + 7], min, max); + } + } +} + +static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2, + int order) +{ + int res = 0; + + while (order--) + res += *v1++ **v2++; + + return res; +} + +static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len) +{ + do { + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + *dst++ = av_clip(*src++, min, max); + len -= 8; + } while (len > 0); +} + +av_cold void ff_audiodsp_init(AudioDSPContext *c) +{ + c->scalarproduct_int16 = scalarproduct_int16_c; + c->vector_clip_int32 = vector_clip_int32_c; + c->vector_clipf = vector_clipf_c; + + if (ARCH_ARM) + ff_audiodsp_init_arm(c); + if (ARCH_PPC) + ff_audiodsp_init_ppc(c); + if (ARCH_X86) + ff_audiodsp_init_x86(c); +} diff --git a/libavcodec/audiodsp.h b/libavcodec/audiodsp.h new file mode 100644 index 0000000000..58205a1f19 --- /dev/null +++ b/libavcodec/audiodsp.h @@ -0,0 +1,59 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_AUDIODSP_H +#define AVCODEC_AUDIODSP_H + +#include + +typedef struct AudioDSPContext { + /** + * Calculate scalar product of two vectors. + * @param len length of vectors, should be multiple of 16 + */ + int32_t (*scalarproduct_int16)(const int16_t *v1, + const int16_t *v2 /* align 16 */, int len); + + /** + * Clip each element in an array of int32_t to a given minimum and + * maximum value. + * @param dst destination array + * constraints: 16-byte aligned + * @param src source array + * constraints: 16-byte aligned + * @param min minimum value + * constraints: must be in the range [-(1 << 24), 1 << 24] + * @param max maximum value + * constraints: must be in the range [-(1 << 24), 1 << 24] + * @param len number of elements in the array + * constraints: multiple of 32 greater than zero + */ + void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, + int32_t max, unsigned int len); + /* assume len is a multiple of 8, and arrays are 16-byte aligned */ + void (*vector_clipf)(float *dst /* align 16 */, + const float *src /* align 16 */, + float min, float max, int len /* align 16 */); +} AudioDSPContext; + +void ff_audiodsp_init(AudioDSPContext *c); +void ff_audiodsp_init_arm(AudioDSPContext *c); +void ff_audiodsp_init_ppc(AudioDSPContext *c); +void ff_audiodsp_init_x86(AudioDSPContext *c); + +#endif /* AVCODEC_AUDIODSP_H */ diff --git a/libavcodec/cook.c b/libavcodec/cook.c index 190d28c340..2d77899c55 100644 --- a/libavcodec/cook.c +++ b/libavcodec/cook.c @@ -44,9 +44,10 @@ #include "libavutil/channel_layout.h" #include "libavutil/lfg.h" + +#include "audiodsp.h" #include "avcodec.h" #include "get_bits.h" -#include "dsputil.h" #include "bytestream.h" #include "fft.h" #include "internal.h" @@ -122,7 +123,7 @@ typedef struct cook { void (*saturate_output)(struct cook *q, float *out); AVCodecContext* avctx; - DSPContext dsp; + AudioDSPContext adsp; GetBitContext gb; /* stream data */ int num_vectors; @@ -865,8 +866,8 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p, */ static void saturate_output_float(COOKContext *q, float *out) { - q->dsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, - -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); + q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, + -1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); } @@ -1065,7 +1066,7 @@ static av_cold int cook_decode_init(AVCodecContext *avctx) /* Initialize RNG. */ av_lfg_init(&q->random_state, 0); - ff_dsputil_init(&q->dsp, avctx); + ff_audiodsp_init(&q->adsp); while (edata_ptr < edata_ptr_end) { /* 8 for mono, 16 for stereo, ? for multichannel diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 8f5ddd0eb1..27e58a5474 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -1267,87 +1267,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) WRAPPER8_16_SQ(rd8x8_c, rd16_c) WRAPPER8_16_SQ(bit8x8_c, bit16_c) -static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, - uint32_t maxi, uint32_t maxisign) -{ - if (a > mini) - return mini; - else if ((a ^ (1U << 31)) > maxisign) - return maxi; - else - return a; -} - -static void vector_clipf_c_opposite_sign(float *dst, const float *src, - float *min, float *max, int len) -{ - int i; - uint32_t mini = *(uint32_t *) min; - uint32_t maxi = *(uint32_t *) max; - uint32_t maxisign = maxi ^ (1U << 31); - uint32_t *dsti = (uint32_t *) dst; - const uint32_t *srci = (const uint32_t *) src; - - for (i = 0; i < len; i += 8) { - dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign); - dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign); - dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign); - dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign); - dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign); - dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign); - dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign); - dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign); - } -} - -static void vector_clipf_c(float *dst, const float *src, - float min, float max, int len) -{ - int i; - - if (min < 0 && max > 0) { - vector_clipf_c_opposite_sign(dst, src, &min, &max, len); - } else { - for (i = 0; i < len; i += 8) { - dst[i] = av_clipf(src[i], min, max); - dst[i + 1] = av_clipf(src[i + 1], min, max); - dst[i + 2] = av_clipf(src[i + 2], min, max); - dst[i + 3] = av_clipf(src[i + 3], min, max); - dst[i + 4] = av_clipf(src[i + 4], min, max); - dst[i + 5] = av_clipf(src[i + 5], min, max); - dst[i + 6] = av_clipf(src[i + 6], min, max); - dst[i + 7] = av_clipf(src[i + 7], min, max); - } - } -} - -static int32_t scalarproduct_int16_c(const int16_t *v1, const int16_t *v2, - int order) -{ - int res = 0; - - while (order--) - res += *v1++ **v2++; - - return res; -} - -static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min, - int32_t max, unsigned int len) -{ - do { - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - *dst++ = av_clip(*src++, min, max); - len -= 8; - } while (len > 0); -} - static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block) { ff_j_rev_dct(block); @@ -1502,10 +1421,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) c->try_8x8basis = try_8x8basis_c; c->add_8x8basis = add_8x8basis_c; - c->scalarproduct_int16 = scalarproduct_int16_c; - c->vector_clip_int32 = vector_clip_int32_c; - c->vector_clipf = vector_clipf_c; - c->shrink[0] = av_image_copy_plane; c->shrink[1] = ff_shrink22; c->shrink[2] = ff_shrink44; diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 1aad789855..24a6f12f6c 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -125,11 +125,6 @@ typedef struct DSPContext { void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len); - /* assume len is a multiple of 8, and arrays are 16-byte aligned */ - void (*vector_clipf)(float *dst /* align 16 */, - const float *src /* align 16 */, - float min, float max, int len /* align 16 */); - /* (I)DCT */ void (*fdct)(int16_t *block /* align 16 */); void (*fdct248)(int16_t *block /* align 16 */); @@ -189,30 +184,6 @@ typedef struct DSPContext { void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height); - - /** - * Calculate scalar product of two vectors. - * @param len length of vectors, should be multiple of 16 - */ - int32_t (*scalarproduct_int16)(const int16_t *v1, - const int16_t *v2 /* align 16 */, int len); - - /** - * Clip each element in an array of int32_t to a given minimum and - * maximum value. - * @param dst destination array - * constraints: 16-byte aligned - * @param src source array - * constraints: 16-byte aligned - * @param min minimum value - * constraints: must be in the range [-(1 << 24), 1 << 24] - * @param max maximum value - * constraints: must be in the range [-(1 << 24), 1 << 24] - * @param len number of elements in the array - * constraints: multiple of 32 greater than zero - */ - void (*vector_clip_int32)(int32_t *dst, const int32_t *src, int32_t min, - int32_t max, unsigned int len); } DSPContext; void ff_dsputil_static_init(void); diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile index bd78f8e728..8a4a789037 100644 --- a/libavcodec/ppc/Makefile +++ b/libavcodec/ppc/Makefile @@ -1,5 +1,6 @@ OBJS += ppc/fmtconvert_altivec.o \ +OBJS-$(CONFIG_AUDIODSP) += ppc/audiodsp.o OBJS-$(CONFIG_BLOCKDSP) += ppc/blockdsp.o OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_ppc.o OBJS-$(CONFIG_FFT) += ppc/fft_altivec.o @@ -24,7 +25,6 @@ ALTIVEC-OBJS-$(CONFIG_DSPUTIL) += ppc/dsputil_altivec.o \ ppc/fdct_altivec.o \ ppc/gmc_altivec.o \ ppc/idct_altivec.o \ - ppc/int_altivec.o \ FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o ALTIVEC-OBJS-$(CONFIG_FFT) += $(FFT-OBJS-yes) diff --git a/libavcodec/ppc/int_altivec.c b/libavcodec/ppc/audiodsp.c similarity index 83% rename from libavcodec/ppc/int_altivec.c rename to libavcodec/ppc/audiodsp.c index d76d34a5b1..36506ce902 100644 --- a/libavcodec/ppc/int_altivec.c +++ b/libavcodec/ppc/audiodsp.c @@ -20,7 +20,7 @@ /** * @file - * miscellaneous integer operations + * miscellaneous audio operations */ #include "config.h" @@ -29,10 +29,13 @@ #endif #include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/ppc/cpu.h" #include "libavutil/ppc/types_altivec.h" #include "libavutil/ppc/util_altivec.h" -#include "libavcodec/dsputil.h" -#include "dsputil_altivec.h" +#include "libavcodec/audiodsp.h" + +#if HAVE_ALTIVEC static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, int order) @@ -56,7 +59,14 @@ static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, return ires; } -av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx) +#endif /* HAVE_ALTIVEC */ + +av_cold void ff_audiodsp_init_ppc(AudioDSPContext *c) { +#if HAVE_ALTIVEC + if (!PPC_ALTIVEC(av_get_cpu_flags())) + return; + c->scalarproduct_int16 = scalarproduct_int16_altivec; +#endif /* HAVE_ALTIVEC */ } diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h index 7833b4b0e3..2ad4910bb0 100644 --- a/libavcodec/ppc/dsputil_altivec.h +++ b/libavcodec/ppc/dsputil_altivec.h @@ -35,6 +35,5 @@ void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth); -void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx); #endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */ diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index b92fbf0a2c..fb1ee4a940 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -34,7 +34,7 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, { if (PPC_ALTIVEC(av_get_cpu_flags())) { ff_dsputil_init_altivec(c, avctx, high_bit_depth); - ff_int_init_altivec(c, avctx); + c->gmc1 = ff_gmc1_altivec; if (!high_bit_depth) { diff --git a/libavcodec/takdec.c b/libavcodec/takdec.c index 0d2dcbbdb2..b0e84ea3c0 100644 --- a/libavcodec/takdec.c +++ b/libavcodec/takdec.c @@ -28,8 +28,8 @@ #include "libavutil/internal.h" #include "libavutil/samplefmt.h" #include "tak.h" +#include "audiodsp.h" #include "avcodec.h" -#include "dsputil.h" #include "internal.h" #include "unary.h" @@ -45,7 +45,7 @@ typedef struct MCDParam { typedef struct TAKDecContext { AVCodecContext *avctx; // parent AVCodecContext - DSPContext dsp; + AudioDSPContext adsp; TAKStreamInfo ti; GetBitContext gb; // bitstream reader initialized to start at the current frame @@ -172,7 +172,7 @@ static av_cold int tak_decode_init(AVCodecContext *avctx) { TAKDecContext *s = avctx->priv_data; - ff_dsputil_init(&s->dsp, avctx); + ff_audiodsp_init(&s->adsp); s->avctx = avctx; @@ -484,8 +484,8 @@ static int decode_subframe(TAKDecContext *s, int32_t *decoded, for (i = 0; i < subframe_size - filter_order; i++) { int v = 1 << (filter_quant - 1); - v += s->dsp.scalarproduct_int16(&s->residues[i], filter, - FFALIGN(filter_order, 16)); + v += s->adsp.scalarproduct_int16(&s->residues[i], filter, + FFALIGN(filter_order, 16)); v = (av_clip(v >> filter_quant, -8192, 8191) << dshift) - *decoded; *decoded++ = v; @@ -654,8 +654,8 @@ static int decorrelate(TAKDecContext *s, int c1, int c2, int length) for (i = 0; i < length2; i++) { int v = 1 << 9; - v += s->dsp.scalarproduct_int16(&s->residues[i], filter, - FFALIGN(filter_order, 16)); + v += s->adsp.scalarproduct_int16(&s->residues[i], filter, + FFALIGN(filter_order, 16)); p1[i] = (av_clip(v >> 10, -8192, 8191) << dshift) - p1[i]; } diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile index 222a0ff9eb..483c850737 100644 --- a/libavcodec/x86/Makefile +++ b/libavcodec/x86/Makefile @@ -2,6 +2,7 @@ OBJS += x86/constants.o \ x86/fmtconvert_init.o \ OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o +OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_init.o OBJS-$(CONFIG_DCT) += x86/dct_init.o OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o \ @@ -44,6 +45,7 @@ OBJS-$(CONFIG_VP7_DECODER) += x86/vp8dsp_init.o OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o +MMX-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_mmx.o MMX-OBJS-$(CONFIG_BLOCKDSP) += x86/blockdsp_mmx.o MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ x86/idct_mmx_xvid.o \ @@ -61,6 +63,7 @@ YASM-OBJS += x86/deinterlace.o \ x86/fmtconvert.o \ YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o +YASM-OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp.o YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o YASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhdenc.o YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm new file mode 100644 index 0000000000..f2e831df17 --- /dev/null +++ b/libavcodec/x86/audiodsp.asm @@ -0,0 +1,137 @@ +;****************************************************************************** +;* optimized audio functions +;* Copyright (c) 2008 Loren Merritt +;* +;* This file is part of Libav. +;* +;* Libav is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* Libav is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with Libav; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_TEXT + +%macro SCALARPRODUCT 0 +; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) +cglobal scalarproduct_int16, 3,3,3, v1, v2, order + shl orderq, 1 + add v1q, orderq + add v2q, orderq + neg orderq + pxor m2, m2 +.loop: + movu m0, [v1q + orderq] + movu m1, [v1q + orderq + mmsize] + pmaddwd m0, [v2q + orderq] + pmaddwd m1, [v2q + orderq + mmsize] + paddd m2, m0 + paddd m2, m1 + add orderq, mmsize*2 + jl .loop +%if mmsize == 16 + movhlps m0, m2 + paddd m2, m0 + pshuflw m0, m2, 0x4e +%else + pshufw m0, m2, 0x4e +%endif + paddd m2, m0 + movd eax, m2 + RET +%endmacro + +INIT_MMX mmxext +SCALARPRODUCT +INIT_XMM sse2 +SCALARPRODUCT + + +;----------------------------------------------------------------------------- +; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, +; int32_t max, unsigned int len) +;----------------------------------------------------------------------------- + +; %1 = number of xmm registers used +; %2 = number of inline load/process/store loops per asm loop +; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop +; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) +; %5 = suffix +%macro VECTOR_CLIP_INT32 4-5 +cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len +%if %4 + cvtsi2ss m4, minm + cvtsi2ss m5, maxm +%else + movd m4, minm + movd m5, maxm +%endif + SPLATD m4 + SPLATD m5 +.loop: +%assign %%i 1 +%rep %2 + mova m0, [srcq+mmsize*0*%%i] + mova m1, [srcq+mmsize*1*%%i] + mova m2, [srcq+mmsize*2*%%i] + mova m3, [srcq+mmsize*3*%%i] +%if %3 + mova m7, [srcq+mmsize*4*%%i] + mova m8, [srcq+mmsize*5*%%i] + mova m9, [srcq+mmsize*6*%%i] + mova m10, [srcq+mmsize*7*%%i] +%endif + CLIPD m0, m4, m5, m6 + CLIPD m1, m4, m5, m6 + CLIPD m2, m4, m5, m6 + CLIPD m3, m4, m5, m6 +%if %3 + CLIPD m7, m4, m5, m6 + CLIPD m8, m4, m5, m6 + CLIPD m9, m4, m5, m6 + CLIPD m10, m4, m5, m6 +%endif + mova [dstq+mmsize*0*%%i], m0 + mova [dstq+mmsize*1*%%i], m1 + mova [dstq+mmsize*2*%%i], m2 + mova [dstq+mmsize*3*%%i], m3 +%if %3 + mova [dstq+mmsize*4*%%i], m7 + mova [dstq+mmsize*5*%%i], m8 + mova [dstq+mmsize*6*%%i], m9 + mova [dstq+mmsize*7*%%i], m10 +%endif +%assign %%i %%i+1 +%endrep + add srcq, mmsize*4*(%2+%3) + add dstq, mmsize*4*(%2+%3) + sub lend, mmsize*(%2+%3) + jg .loop + REP_RET +%endmacro + +INIT_MMX mmx +%define CLIPD CLIPD_MMX +VECTOR_CLIP_INT32 0, 1, 0, 0 +INIT_XMM sse2 +VECTOR_CLIP_INT32 6, 1, 0, 0, _int +%define CLIPD CLIPD_SSE2 +VECTOR_CLIP_INT32 6, 2, 0, 1 +INIT_XMM sse4 +%define CLIPD CLIPD_SSE41 +%ifdef m8 +VECTOR_CLIP_INT32 11, 1, 1, 0 +%else +VECTOR_CLIP_INT32 6, 1, 0, 0 +%endif diff --git a/libavcodec/x86/audiodsp.h b/libavcodec/x86/audiodsp.h new file mode 100644 index 0000000000..321056b8b7 --- /dev/null +++ b/libavcodec/x86/audiodsp.h @@ -0,0 +1,25 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_X86_AUDIODSP_H +#define AVCODEC_X86_AUDIODSP_H + +void ff_vector_clipf_sse(float *dst, const float *src, + float min, float max, int len); + +#endif /* AVCODEC_X86_AUDIODSP_H */ diff --git a/libavcodec/x86/audiodsp_init.c b/libavcodec/x86/audiodsp_init.c new file mode 100644 index 0000000000..743f5a3699 --- /dev/null +++ b/libavcodec/x86/audiodsp_init.c @@ -0,0 +1,66 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/x86/asm.h" +#include "libavutil/x86/cpu.h" +#include "libavcodec/audiodsp.h" +#include "audiodsp.h" + +int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, + int order); +int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, + int order); + +void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); +void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); +void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); +void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, + int32_t min, int32_t max, unsigned int len); + +av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) +{ + int cpu_flags = av_get_cpu_flags(); + + if (EXTERNAL_MMX(cpu_flags)) + c->vector_clip_int32 = ff_vector_clip_int32_mmx; + + if (EXTERNAL_MMXEXT(cpu_flags)) + c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; + + if (INLINE_SSE(cpu_flags)) + c->vector_clipf = ff_vector_clipf_sse; + + if (EXTERNAL_SSE2(cpu_flags)) { + c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; + if (cpu_flags & AV_CPU_FLAG_ATOM) + c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; + else + c->vector_clip_int32 = ff_vector_clip_int32_sse2; + } + + if (EXTERNAL_SSE4(cpu_flags)) + c->vector_clip_int32 = ff_vector_clip_int32_sse4; +} diff --git a/libavcodec/x86/audiodsp_mmx.c b/libavcodec/x86/audiodsp_mmx.c new file mode 100644 index 0000000000..cb550598f9 --- /dev/null +++ b/libavcodec/x86/audiodsp_mmx.c @@ -0,0 +1,58 @@ +/* + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/x86/asm.h" +#include "audiodsp.h" + +#if HAVE_INLINE_ASM + +void ff_vector_clipf_sse(float *dst, const float *src, + float min, float max, int len) +{ + x86_reg i = (len - 16) * 4; + __asm__ volatile ( + "movss %3, %%xmm4 \n\t" + "movss %4, %%xmm5 \n\t" + "shufps $0, %%xmm4, %%xmm4 \n\t" + "shufps $0, %%xmm5, %%xmm5 \n\t" + "1: \n\t" + "movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel + "movaps 16(%2, %0), %%xmm1 \n\t" + "movaps 32(%2, %0), %%xmm2 \n\t" + "movaps 48(%2, %0), %%xmm3 \n\t" + "maxps %%xmm4, %%xmm0 \n\t" + "maxps %%xmm4, %%xmm1 \n\t" + "maxps %%xmm4, %%xmm2 \n\t" + "maxps %%xmm4, %%xmm3 \n\t" + "minps %%xmm5, %%xmm0 \n\t" + "minps %%xmm5, %%xmm1 \n\t" + "minps %%xmm5, %%xmm2 \n\t" + "minps %%xmm5, %%xmm3 \n\t" + "movaps %%xmm0, (%1, %0) \n\t" + "movaps %%xmm1, 16(%1, %0) \n\t" + "movaps %%xmm2, 32(%1, %0) \n\t" + "movaps %%xmm3, 48(%1, %0) \n\t" + "sub $64, %0 \n\t" + "jge 1b \n\t" + : "+&r" (i) + : "r" (dst), "r" (src), "m" (min), "m" (max) + : "memory"); +} + +#endif /* HAVE_INLINE_ASM */ diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm index b5d6d3cc65..8f5a14d5a9 100644 --- a/libavcodec/x86/dsputil.asm +++ b/libavcodec/x86/dsputil.asm @@ -26,119 +26,6 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 SECTION_TEXT -%macro SCALARPRODUCT 0 -; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order) -cglobal scalarproduct_int16, 3,3,3, v1, v2, order - shl orderq, 1 - add v1q, orderq - add v2q, orderq - neg orderq - pxor m2, m2 -.loop: - movu m0, [v1q + orderq] - movu m1, [v1q + orderq + mmsize] - pmaddwd m0, [v2q + orderq] - pmaddwd m1, [v2q + orderq + mmsize] - paddd m2, m0 - paddd m2, m1 - add orderq, mmsize*2 - jl .loop -%if mmsize == 16 - movhlps m0, m2 - paddd m2, m0 - pshuflw m0, m2, 0x4e -%else - pshufw m0, m2, 0x4e -%endif - paddd m2, m0 - movd eax, m2 - RET -%endmacro - -INIT_MMX mmxext -SCALARPRODUCT -INIT_XMM sse2 -SCALARPRODUCT - - -;----------------------------------------------------------------------------- -; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, -; int32_t max, unsigned int len) -;----------------------------------------------------------------------------- - -; %1 = number of xmm registers used -; %2 = number of inline load/process/store loops per asm loop -; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop -; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) -; %5 = suffix -%macro VECTOR_CLIP_INT32 4-5 -cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len -%if %4 - cvtsi2ss m4, minm - cvtsi2ss m5, maxm -%else - movd m4, minm - movd m5, maxm -%endif - SPLATD m4 - SPLATD m5 -.loop: -%assign %%i 1 -%rep %2 - mova m0, [srcq+mmsize*0*%%i] - mova m1, [srcq+mmsize*1*%%i] - mova m2, [srcq+mmsize*2*%%i] - mova m3, [srcq+mmsize*3*%%i] -%if %3 - mova m7, [srcq+mmsize*4*%%i] - mova m8, [srcq+mmsize*5*%%i] - mova m9, [srcq+mmsize*6*%%i] - mova m10, [srcq+mmsize*7*%%i] -%endif - CLIPD m0, m4, m5, m6 - CLIPD m1, m4, m5, m6 - CLIPD m2, m4, m5, m6 - CLIPD m3, m4, m5, m6 -%if %3 - CLIPD m7, m4, m5, m6 - CLIPD m8, m4, m5, m6 - CLIPD m9, m4, m5, m6 - CLIPD m10, m4, m5, m6 -%endif - mova [dstq+mmsize*0*%%i], m0 - mova [dstq+mmsize*1*%%i], m1 - mova [dstq+mmsize*2*%%i], m2 - mova [dstq+mmsize*3*%%i], m3 -%if %3 - mova [dstq+mmsize*4*%%i], m7 - mova [dstq+mmsize*5*%%i], m8 - mova [dstq+mmsize*6*%%i], m9 - mova [dstq+mmsize*7*%%i], m10 -%endif -%assign %%i %%i+1 -%endrep - add srcq, mmsize*4*(%2+%3) - add dstq, mmsize*4*(%2+%3) - sub lend, mmsize*(%2+%3) - jg .loop - REP_RET -%endmacro - -INIT_MMX mmx -%define CLIPD CLIPD_MMX -VECTOR_CLIP_INT32 0, 1, 0, 0 -INIT_XMM sse2 -VECTOR_CLIP_INT32 6, 1, 0, 0, _int -%define CLIPD CLIPD_SSE2 -VECTOR_CLIP_INT32 6, 2, 0, 1 -INIT_XMM sse4 -%define CLIPD CLIPD_SSE41 -%ifdef m8 -VECTOR_CLIP_INT32 11, 1, 1, 0 -%else -VECTOR_CLIP_INT32 6, 1, 0, 0 -%endif - ; %1 = aligned/unaligned %macro BSWAP_LOOPS 1 mov r3, r2 diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index a19b83d83c..646435df11 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -26,23 +26,9 @@ #include "dsputil_x86.h" #include "idct_xvid.h" -int32_t ff_scalarproduct_int16_mmxext(const int16_t *v1, const int16_t *v2, - int order); -int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, - int order); - void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); -void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); -void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); -void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); -void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, - int32_t min, int32_t max, unsigned int len); - static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, int cpu_flags, unsigned high_bit_depth) { @@ -72,10 +58,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, c->gmc = ff_gmc_mmx; #endif /* HAVE_MMX_INLINE */ - -#if HAVE_MMX_EXTERNAL - c->vector_clip_int32 = ff_vector_clip_int32_mmx; -#endif /* HAVE_MMX_EXTERNAL */ } static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, @@ -88,18 +70,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, c->idct = ff_idct_xvid_mmxext; } #endif /* HAVE_MMXEXT_INLINE */ - -#if HAVE_MMXEXT_EXTERNAL - c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; -#endif /* HAVE_MMXEXT_EXTERNAL */ -} - -static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, - int cpu_flags, unsigned high_bit_depth) -{ -#if HAVE_SSE_INLINE - c->vector_clipf = ff_vector_clipf_sse; -#endif /* HAVE_SSE_INLINE */ } static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, @@ -115,12 +85,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_SSE2_INLINE */ #if HAVE_SSE2_EXTERNAL - c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; - if (cpu_flags & AV_CPU_FLAG_ATOM) { - c->vector_clip_int32 = ff_vector_clip_int32_int_sse2; - } else { - c->vector_clip_int32 = ff_vector_clip_int32_sse2; - } c->bswap_buf = ff_bswap32_buf_sse2; #endif /* HAVE_SSE2_EXTERNAL */ } @@ -133,14 +97,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_SSSE3_EXTERNAL */ } -static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, - int cpu_flags, unsigned high_bit_depth) -{ -#if HAVE_SSE4_EXTERNAL - c->vector_clip_int32 = ff_vector_clip_int32_sse4; -#endif /* HAVE_SSE4_EXTERNAL */ -} - av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) { @@ -152,18 +108,12 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, if (X86_MMXEXT(cpu_flags)) dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth); - if (X86_SSE(cpu_flags)) - dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth); - if (X86_SSE2(cpu_flags)) dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth); if (EXTERNAL_SSSE3(cpu_flags)) dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth); - if (EXTERNAL_SSE4(cpu_flags)) - dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth); - if (CONFIG_ENCODERS) ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); } diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index fd74efeb3d..fe43804428 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -25,7 +25,6 @@ #include "config.h" #include "libavutil/cpu.h" #include "libavutil/x86/asm.h" -#include "constants.h" #include "dsputil_x86.h" #include "inline_asm.h" @@ -375,37 +374,4 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src, } } -void ff_vector_clipf_sse(float *dst, const float *src, - float min, float max, int len) -{ - x86_reg i = (len - 16) * 4; - __asm__ volatile ( - "movss %3, %%xmm4 \n\t" - "movss %4, %%xmm5 \n\t" - "shufps $0, %%xmm4, %%xmm4 \n\t" - "shufps $0, %%xmm5, %%xmm5 \n\t" - "1: \n\t" - "movaps (%2, %0), %%xmm0 \n\t" // 3/1 on intel - "movaps 16(%2, %0), %%xmm1 \n\t" - "movaps 32(%2, %0), %%xmm2 \n\t" - "movaps 48(%2, %0), %%xmm3 \n\t" - "maxps %%xmm4, %%xmm0 \n\t" - "maxps %%xmm4, %%xmm1 \n\t" - "maxps %%xmm4, %%xmm2 \n\t" - "maxps %%xmm4, %%xmm3 \n\t" - "minps %%xmm5, %%xmm0 \n\t" - "minps %%xmm5, %%xmm1 \n\t" - "minps %%xmm5, %%xmm2 \n\t" - "minps %%xmm5, %%xmm3 \n\t" - "movaps %%xmm0, (%1, %0) \n\t" - "movaps %%xmm1, 16(%1, %0) \n\t" - "movaps %%xmm2, 32(%1, %0) \n\t" - "movaps %%xmm3, 48(%1, %0) \n\t" - "sub $64, %0 \n\t" - "jge 1b \n\t" - : "+&r" (i) - : "r" (dst), "r" (src), "m" (min), "m" (max) - : "memory"); -} - #endif /* HAVE_INLINE_ASM */ diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h index e99b6b7630..eeb9ca6114 100644 --- a/libavcodec/x86/dsputil_x86.h +++ b/libavcodec/x86/dsputil_x86.h @@ -46,7 +46,4 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src, int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); -void ff_vector_clipf_sse(float *dst, const float *src, - float min, float max, int len); - #endif /* AVCODEC_X86_DSPUTIL_X86_H */