ARM: allow runtime masking of CPU features

This allows masking CPU features with the -cpuflags avconv option which is useful for testing different optimisations without rebuilding. Signed-off-by: Mans Rullgard <mans@mansr.com>
13 years ago · d526c5338d
parent d7458bc8c6
commit d526c5338d
20 changed files with 164 additions and 25 deletions
--- a/avconv.c
+++ b/avconv.c
@ -4865,6 +4865,14 @@ static int opt_cpuflags(const char *opt, const char *arg)
        { "fma4"    , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_FMA4         },    .unit = "flags" },
        { "3dnow"   , NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOW        },    .unit = "flags" },
        { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { CPUFLAG_3DNOWEXT     },    .unit = "flags" },
+
+        { "armv5te",  NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ARMV5TE  },    .unit = "flags" },
+        { "armv6",    NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ARMV6    },    .unit = "flags" },
+        { "armv6t2",  NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_ARMV6T2  },    .unit = "flags" },
+        { "vfp",      NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_VFP      },    .unit = "flags" },
+        { "vfpv3",    NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_VFPV3    },    .unit = "flags" },
+        { "neon",     NULL, 0, AV_OPT_TYPE_CONST, { AV_CPU_FLAG_NEON     },    .unit = "flags" },
+
        { NULL },
    };
    static const AVClass class = {
--- a/libavcodec/arm/ac3dsp_init_arm.c
+++ b/libavcodec/arm/ac3dsp_init_arm.c
@ -19,6 +19,8 @@
 */

 #include <stdint.h>
+
+#include "libavutil/arm/cpu.h"
 #include "libavutil/attributes.h"
 #include "libavcodec/ac3dsp.h"
 #include "config.h"
@ -39,13 +41,15 @@ void ff_ac3_update_bap_counts_arm(uint16_t mant_cnt[16], uint8_t *bap, int len);

 av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
 {
+    int cpu_flags = av_get_cpu_flags();
+
    c->update_bap_counts         = ff_ac3_update_bap_counts_arm;

-    if (HAVE_ARMV6) {
+    if (have_armv6(cpu_flags)) {
        c->bit_alloc_calc_bap    = ff_ac3_bit_alloc_calc_bap_armv6;
    }

-    if (HAVE_NEON) {
+    if (have_neon(cpu_flags)) {
        c->ac3_exponent_min      = ff_ac3_exponent_min_neon;
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_neon;
        c->ac3_lshift_int16      = ff_ac3_lshift_int16_neon;
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@ -19,6 +19,8 @@
 */

 #include "config.h"
+
+#include "libavutil/arm/cpu.h"
 #include "libavutil/attributes.h"
 #include "libavcodec/dcadsp.h"

@ -27,6 +29,8 @@ void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,

 av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
 {
-    if (HAVE_NEON)
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
        s->lfe_fir = ff_dca_lfe_fir_neon;
 }
--- a/libavcodec/arm/dsputil_init_arm.c
+++ b/libavcodec/arm/dsputil_init_arm.c
@ -19,6 +19,7 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

+#include "libavutil/arm/cpu.h"
 #include "libavcodec/dsputil.h"
 #include "dsputil_arm.h"

@ -76,6 +77,7 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, DCTELEM *block)
 void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
 {
    const int high_bit_depth = avctx->bits_per_raw_sample > 8;
+    int cpu_flags = av_get_cpu_flags();

    ff_put_pixels_clamped = c->put_pixels_clamped;
    ff_add_pixels_clamped = c->add_pixels_clamped;
@ -117,8 +119,8 @@ void ff_dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
    c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_arm;
    }

-    if (HAVE_ARMV5TE) ff_dsputil_init_armv5te(c, avctx);
-    if (HAVE_ARMV6)   ff_dsputil_init_armv6(c, avctx);
-    if (HAVE_ARMVFP)  ff_dsputil_init_vfp(c, avctx);
-    if (HAVE_NEON)    ff_dsputil_init_neon(c, avctx);
+    if (have_armv5te(cpu_flags)) ff_dsputil_init_armv5te(c, avctx);
+    if (have_armv6(cpu_flags))   ff_dsputil_init_armv6(c, avctx);
+    if (have_vfp(cpu_flags))     ff_dsputil_init_vfp(c, avctx);
+    if (have_neon(cpu_flags))    ff_dsputil_init_neon(c, avctx);
 }
--- a/libavcodec/arm/dsputil_init_vfp.c
+++ b/libavcodec/arm/dsputil_init_vfp.c
@ -18,6 +18,7 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

+#include "libavutil/arm/cpu.h"
 #include "libavcodec/dsputil.h"
 #include "dsputil_arm.h"

@ -28,7 +29,9 @@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,

 void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx)
 {
-    if (!HAVE_VFPV3)
+    int cpu_flags = av_get_cpu_flags();
+
+    if (!have_vfpv3(cpu_flags))
        c->vector_fmul = ff_vector_fmul_vfp;
    c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp;
 }
--- a/libavcodec/arm/fft_fixed_init_arm.c
+++ b/libavcodec/arm/fft_fixed_init_arm.c
@ -18,6 +18,8 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

+#include "libavutil/arm/cpu.h"
+
 #define CONFIG_FFT_FLOAT 0
 #include "libavcodec/fft.h"

@ -27,7 +29,9 @@ void ff_mdct_fixed_calcw_neon(FFTContext *s, FFTDouble *o, const FFTSample *i);

 av_cold void ff_fft_fixed_init_arm(FFTContext *s)
 {
-    if (HAVE_NEON) {
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
        s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
        s->fft_calc        = ff_fft_fixed_calc_neon;

--- a/libavcodec/arm/fft_init_arm.c
+++ b/libavcodec/arm/fft_init_arm.c
@ -18,6 +18,7 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

+#include "libavutil/arm/cpu.h"
 #include "libavcodec/fft.h"
 #include "libavcodec/rdft.h"
 #include "libavcodec/synth_filter.h"
@ -39,7 +40,9 @@ void ff_synth_filter_float_neon(FFTContext *imdct,

 av_cold void ff_fft_init_arm(FFTContext *s)
 {
-    if (HAVE_NEON) {
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
        s->fft_permute  = ff_fft_permute_neon;
        s->fft_calc     = ff_fft_calc_neon;
 #if CONFIG_MDCT
@ -54,7 +57,9 @@ av_cold void ff_fft_init_arm(FFTContext *s)
 #if CONFIG_RDFT
 av_cold void ff_rdft_init_arm(RDFTContext *s)
 {
-    if (HAVE_NEON)
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
        s->rdft_calc    = ff_rdft_calc_neon;
 }
 #endif
@ -62,7 +67,9 @@ av_cold void ff_rdft_init_arm(RDFTContext *s)
 #if CONFIG_DCA_DECODER
 av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
 {
-    if (HAVE_NEON)
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
        s->synth_filter_float = ff_synth_filter_float_neon;
 }
 #endif
--- a/libavcodec/arm/fmtconvert_init_arm.c
+++ b/libavcodec/arm/fmtconvert_init_arm.c
@ -20,6 +20,7 @@

 #include <stdint.h>

+#include "libavutil/arm/cpu.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/fmtconvert.h"

@ -33,11 +34,13 @@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, long len);

 void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx)
 {
-    if (HAVE_ARMVFP && HAVE_ARMV6) {
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) {
        c->float_to_int16 = ff_float_to_int16_vfp;
    }

-    if (HAVE_NEON) {
+    if (have_neon(cpu_flags)) {
        c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon;

        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@ -20,6 +20,7 @@

 #include <stdint.h>

+#include "libavutil/arm/cpu.h"
 #include "libavcodec/dsputil.h"
 #include "libavcodec/h264dsp.h"

@ -97,5 +98,8 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const i

 void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
 {
-    if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc);
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc);
 }
--- a/libavcodec/arm/h264pred_init_arm.c
+++ b/libavcodec/arm/h264pred_init_arm.c
@ -20,6 +20,7 @@

 #include <stdint.h>

+#include "libavutil/arm/cpu.h"
 #include "libavcodec/h264pred.h"

 void ff_pred16x16_vert_neon(uint8_t *src, int stride);
@ -76,5 +77,8 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int b

 void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth, const int chroma_format_idc)
 {
-    if (HAVE_NEON)    ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags))
+        ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc);
 }
--- a/libavcodec/arm/mpegaudiodsp_init_arm.c
+++ b/libavcodec/arm/mpegaudiodsp_init_arm.c
@ -19,6 +19,8 @@
 */

 #include <stdint.h>
+
+#include "libavutil/arm/cpu.h"
 #include "libavcodec/mpegaudiodsp.h"
 #include "config.h"

@ -27,7 +29,9 @@ void ff_mpadsp_apply_window_fixed_armv6(int32_t *synth_buf, int32_t *window,

 void ff_mpadsp_init_arm(MPADSPContext *s)
 {
-    if (HAVE_ARMV6) {
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_armv6(cpu_flags)) {
        s->apply_window_fixed = ff_mpadsp_apply_window_fixed_armv6;
    }
 }
--- a/libavcodec/arm/mpegvideo_arm.c
+++ b/libavcodec/arm/mpegvideo_arm.c
@ -18,6 +18,7 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

+#include "libavutil/arm/cpu.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/dsputil.h"
 #include "libavcodec/mpegvideo.h"
@ -40,11 +41,12 @@ void ff_dct_unquantize_h263_intra_neon(MpegEncContext *s, DCTELEM *block,

 void ff_MPV_common_init_arm(MpegEncContext *s)
 {
-#if HAVE_ARMV5TE
-    ff_MPV_common_init_armv5te(s);
-#endif
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_armv5te(cpu_flags))
+        ff_MPV_common_init_armv5te(s);

-    if (HAVE_NEON) {
+    if (have_neon(cpu_flags)) {
        s->dct_unquantize_h263_intra = ff_dct_unquantize_h263_intra_neon;
        s->dct_unquantize_h263_inter = ff_dct_unquantize_h263_inter_neon;
    }
--- a/libavcodec/arm/sbrdsp_init_arm.c
+++ b/libavcodec/arm/sbrdsp_init_arm.c
@ -19,6 +19,7 @@
 */

 #include "config.h"
+#include "libavutil/arm/cpu.h"
 #include "libavutil/attributes.h"
 #include "libavcodec/sbrdsp.h"

@ -51,7 +52,9 @@ void ff_sbr_hf_apply_noise_3_neon(float Y[64][2], const float *s_m,

 av_cold void ff_sbrdsp_init_arm(SBRDSPContext *s)
 {
-    if (HAVE_NEON) {
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
        s->sum64x5 = ff_sbr_sum64x5_neon;
        s->sum_square = ff_sbr_sum_square_neon;
        s->neg_odd_64 = ff_sbr_neg_odd_64_neon;
--- a/libavcodec/arm/vp56dsp_init_arm.c
+++ b/libavcodec/arm/vp56dsp_init_arm.c
@ -19,6 +19,8 @@
 */

 #include <stdint.h>
+
+#include "libavutil/arm/cpu.h"
 #include "libavcodec/avcodec.h"
 #include "libavcodec/vp56dsp.h"

@ -27,7 +29,9 @@ void ff_vp6_edge_filter_ver_neon(uint8_t *yuv, int stride, int t);

 void ff_vp56dsp_init_arm(VP56DSPContext *s, enum CodecID codec)
 {
-    if (codec != CODEC_ID_VP5 && HAVE_NEON) {
+    int cpu_flags = av_get_cpu_flags();
+
+    if (codec != CODEC_ID_VP5 && have_neon(cpu_flags)) {
        s->edge_filter_hor = ff_vp6_edge_filter_hor_neon;
        s->edge_filter_ver = ff_vp6_edge_filter_ver_neon;
    }
--- a/libavcodec/arm/vp8dsp_init_arm.c
+++ b/libavcodec/arm/vp8dsp_init_arm.c
@ -17,6 +17,8 @@
 */

 #include <stdint.h>
+
+#include "libavutil/arm/cpu.h"
 #include "libavcodec/vp8dsp.h"

 void ff_vp8_luma_dc_wht_neon(DCTELEM block[4][4][16], DCTELEM dc[16]);
@ -83,7 +85,9 @@ VP8_MC(bilin4_hv);

 av_cold void ff_vp8dsp_init_arm(VP8DSPContext *dsp)
 {
-    if (HAVE_NEON) {
+    int cpu_flags = av_get_cpu_flags();
+
+    if (have_neon(cpu_flags)) {
        dsp->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_neon;
        dsp->vp8_luma_dc_wht_dc = ff_vp8_luma_dc_wht_dc_neon;

--- a/libavutil/arm/Makefile
+++ b/libavutil/arm/Makefile
@ -0,0 +1 @@
+OBJS += arm/cpu.o
--- a/libavutil/arm/cpu.c
+++ b/libavutil/arm/cpu.c
@ -0,0 +1,30 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "config.h"
+
+int ff_get_cpu_flags_arm(void)
+{
+    return AV_CPU_FLAG_ARMV5TE * HAVE_ARMV5TE |
+           AV_CPU_FLAG_ARMV6   * HAVE_ARMV6   |
+           AV_CPU_FLAG_ARMV6T2 * HAVE_ARMV6T2 |
+           AV_CPU_FLAG_VFP     * HAVE_ARMVFP  |
+           AV_CPU_FLAG_VFPV3   * HAVE_VFPV3   |
+           AV_CPU_FLAG_NEON    * HAVE_NEON;
+}
--- a/libavutil/arm/cpu.h
+++ b/libavutil/arm/cpu.h
@ -0,0 +1,32 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_ARM_CPU_H
+#define AVUTIL_ARM_CPU_H
+
+#include "config.h"
+#include "libavutil/cpu.h"
+
+#define have_armv5te(flags) (HAVE_ARMV5TE && ((flags) & AV_CPU_FLAG_ARMV5TE))
+#define have_armv6(flags)   (HAVE_ARMV6   && ((flags) & AV_CPU_FLAG_ARMV6))
+#define have_armv6t2(flags) (HAVE_ARMV6T2 && ((flags) & AV_CPU_FLAG_ARMV6T2))
+#define have_vfp(flags)     (HAVE_ARMVFP  && ((flags) & AV_CPU_FLAG_VFP))
+#define have_vfpv3(flags)   (HAVE_VFPV3   && ((flags) & AV_CPU_FLAG_VFPV3))
+#define have_neon(flags)    (HAVE_NEON    && ((flags) & AV_CPU_FLAG_NEON))
+
+#endif
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@ -28,6 +28,7 @@ int av_get_cpu_flags(void)
    if (checked)
        return flags;

+    if (ARCH_ARM) flags = ff_get_cpu_flags_arm();
    if (ARCH_PPC) flags = ff_get_cpu_flags_ppc();
    if (ARCH_X86) flags = ff_get_cpu_flags_x86();

@ -52,7 +53,14 @@ static const struct {
    int flag;
    const char *name;
 } cpu_flag_tab[] = {
-#if   ARCH_PPC
+#if   ARCH_ARM
+    { AV_CPU_FLAG_ARMV5TE,   "armv5te"    },
+    { AV_CPU_FLAG_ARMV6,     "armv6"      },
+    { AV_CPU_FLAG_ARMV6T2,   "armv6t2"    },
+    { AV_CPU_FLAG_VFP,       "vfp"        },
+    { AV_CPU_FLAG_VFPV3,     "vfpv3"      },
+    { AV_CPU_FLAG_NEON,      "neon"       },
+#elif ARCH_PPC
    { AV_CPU_FLAG_ALTIVEC,   "altivec"    },
 #elif ARCH_X86
    { AV_CPU_FLAG_MMX,       "mmx"        },
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@ -42,6 +42,13 @@
 #define AV_CPU_FLAG_FMA4         0x0800 ///< Bulldozer FMA4 functions
 #define AV_CPU_FLAG_ALTIVEC      0x0001 ///< standard

+#define AV_CPU_FLAG_ARMV5TE      (1 << 0)
+#define AV_CPU_FLAG_ARMV6        (1 << 1)
+#define AV_CPU_FLAG_ARMV6T2      (1 << 2)
+#define AV_CPU_FLAG_VFP          (1 << 3)
+#define AV_CPU_FLAG_VFPV3        (1 << 4)
+#define AV_CPU_FLAG_NEON         (1 << 5)
+
 /**
 * Return the flags which specify extensions supported by the CPU.
 */
@ -56,6 +63,7 @@ int av_get_cpu_flags(void);
 void av_set_cpu_flags_mask(int mask);

 /* The following CPU-specific functions shall not be called directly. */
+int ff_get_cpu_flags_arm(void);
 int ff_get_cpu_flags_ppc(void);
 int ff_get_cpu_flags_x86(void);