diff --git a/libavcodec/arm/dsputil_arm.h b/libavcodec/arm/dsputil_arm.h index 35feff29fa..e1c42ee17f 100644 --- a/libavcodec/arm/dsputil_arm.h +++ b/libavcodec/arm/dsputil_arm.h @@ -24,8 +24,11 @@ #include "libavcodec/avcodec.h" #include "libavcodec/dsputil.h" -void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx); -void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx); -void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx); +void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); #endif /* AVCODEC_ARM_DSPUTIL_ARM_H */ diff --git a/libavcodec/arm/dsputil_init_arm.c b/libavcodec/arm/dsputil_init_arm.c index aa2b9714fb..c3a48cdecf 100644 --- a/libavcodec/arm/dsputil_init_arm.c +++ b/libavcodec/arm/dsputil_init_arm.c @@ -64,14 +64,15 @@ static void simple_idct_arm_add(uint8_t *dest, int line_size, int16_t *block) ff_add_pixels_clamped(block, dest, line_size); } -av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); ff_put_pixels_clamped = c->put_pixels_clamped; ff_add_pixels_clamped = c->add_pixels_clamped; - if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) { + if (!avctx->lowres && !high_bit_depth) { if (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_ARM) { c->idct_put = j_rev_dct_arm_put; @@ -89,9 +90,9 @@ av_cold void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx) c->add_pixels_clamped = ff_add_pixels_clamped_arm; if (have_armv5te(cpu_flags)) - ff_dsputil_init_armv5te(c, avctx); + ff_dsputil_init_armv5te(c, avctx, high_bit_depth); if (have_armv6(cpu_flags)) - ff_dsputil_init_armv6(c, avctx); + ff_dsputil_init_armv6(c, avctx, high_bit_depth); if (have_neon(cpu_flags)) - ff_dsputil_init_neon(c, avctx); + ff_dsputil_init_neon(c, avctx, high_bit_depth); } diff --git a/libavcodec/arm/dsputil_init_armv5te.c b/libavcodec/arm/dsputil_init_armv5te.c index fb47a404ad..883f6e40ad 100644 --- a/libavcodec/arm/dsputil_init_armv5te.c +++ b/libavcodec/arm/dsputil_init_armv5te.c @@ -29,9 +29,10 @@ void ff_simple_idct_armv5te(int16_t *data); void ff_simple_idct_put_armv5te(uint8_t *dest, int line_size, int16_t *data); void ff_simple_idct_add_armv5te(uint8_t *dest, int line_size, int16_t *data); -av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_armv5te(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 && + if (!avctx->lowres && !high_bit_depth && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEARMV5TE)) { c->idct_put = ff_simple_idct_put_armv5te; diff --git a/libavcodec/arm/dsputil_init_armv6.c b/libavcodec/arm/dsputil_init_armv6.c index ee967c0d25..765ccb3151 100644 --- a/libavcodec/arm/dsputil_init_armv6.c +++ b/libavcodec/arm/dsputil_init_armv6.c @@ -52,17 +52,17 @@ int ff_sse16_armv6(void *s, uint8_t *blk1, uint8_t *blk2, int ff_pix_norm1_armv6(uint8_t *pix, int line_size); int ff_pix_sum_armv6(uint8_t *pix, int line_size); -av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_armv6(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - - if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 && - (avctx->idct_algo == FF_IDCT_AUTO || - avctx->idct_algo == FF_IDCT_SIMPLEARMV6)) { - c->idct_put = ff_simple_idct_put_armv6; - c->idct_add = ff_simple_idct_add_armv6; - c->idct = ff_simple_idct_armv6; - c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; + if (!avctx->lowres && !high_bit_depth) { + if (avctx->idct_algo == FF_IDCT_AUTO || + avctx->idct_algo == FF_IDCT_SIMPLEARMV6) { + c->idct_put = ff_simple_idct_put_armv6; + c->idct_add = ff_simple_idct_add_armv6; + c->idct = ff_simple_idct_armv6; + c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM; + } } c->add_pixels_clamped = ff_add_pixels_clamped_armv6; diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c index 94dec38a30..8da442bf82 100644 --- a/libavcodec/arm/dsputil_init_neon.c +++ b/libavcodec/arm/dsputil_init_neon.c @@ -47,11 +47,10 @@ int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int le int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, const int16_t *v3, int len, int mul); -av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - - if (!avctx->lowres && avctx->bits_per_raw_sample <= 8) { + if (!avctx->lowres && !high_bit_depth) { if (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLENEON) { c->idct_put = ff_simple_idct_put_neon; diff --git a/libavcodec/bfin/dsputil_init.c b/libavcodec/bfin/dsputil_init.c index 1f24a52782..f488625ba6 100644 --- a/libavcodec/bfin/dsputil_init.c +++ b/libavcodec/bfin/dsputil_init.c @@ -147,18 +147,14 @@ static int bfin_pix_abs8_xy2(void *c, uint8_t *blk1, uint8_t *blk2, * 2.64s 2/20 same sman.mp4 decode only */ -av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - c->diff_pixels = ff_bfin_diff_pixels; c->put_pixels_clamped = ff_bfin_put_pixels_clamped; c->add_pixels_clamped = ff_bfin_add_pixels_clamped; - if (!high_bit_depth) - c->get_pixels = ff_bfin_get_pixels; - c->clear_blocks = bfin_clear_blocks; c->pix_sum = ff_bfin_pix_sum; @@ -182,7 +178,9 @@ av_cold void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx) c->sse[1] = ff_bfin_sse8; c->sse[2] = ff_bfin_sse4; - if (avctx->bits_per_raw_sample <= 8) { + if (!high_bit_depth) { + c->get_pixels = ff_bfin_get_pixels; + if (avctx->dct_algo == FF_DCT_AUTO) c->fdct = ff_bfin_fdct; diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 1b065fd2c6..c7e7fe6e29 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -2884,6 +2884,8 @@ int ff_check_alignment(void) av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) { + const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; + ff_check_alignment(); #if CONFIG_ENCODERS @@ -3127,13 +3129,13 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) if (ARCH_ALPHA) ff_dsputil_init_alpha(c, avctx); if (ARCH_ARM) - ff_dsputil_init_arm(c, avctx); + ff_dsputil_init_arm(c, avctx, high_bit_depth); if (ARCH_BFIN) - ff_dsputil_init_bfin(c, avctx); + ff_dsputil_init_bfin(c, avctx, high_bit_depth); if (ARCH_PPC) - ff_dsputil_init_ppc(c, avctx); + ff_dsputil_init_ppc(c, avctx, high_bit_depth); if (ARCH_X86) - ff_dsputil_init_x86(c, avctx); + ff_dsputil_init_x86(c, avctx, high_bit_depth); ff_init_scantable_permutation(c->idct_permutation, c->idct_permutation_type); diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index a506956589..3f35e420f0 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -355,10 +355,14 @@ int ff_check_alignment(void); void ff_set_cmp(DSPContext *c, me_cmp_func *cmp, int type); void ff_dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); -void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx); -void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx); -void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx); -void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx); +void ff_dsputil_init_arm(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_dsputil_init_bfin(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); +void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); void ff_dsputil_init_dwt(DSPContext *c); diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c index 36e530c3f4..eb8e5703c6 100644 --- a/libavcodec/ppc/dsputil_altivec.c +++ b/libavcodec/ppc/dsputil_altivec.c @@ -926,10 +926,9 @@ static int hadamard8_diff16_altivec(/* MpegEncContext */ void *s, uint8_t *dst, return score; } -av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - c->pix_abs[0][1] = sad16_x2_altivec; c->pix_abs[0][2] = sad16_y2_altivec; c->pix_abs[0][3] = sad16_xy2_altivec; diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h index 8d8a7a2804..dc4da0b388 100644 --- a/libavcodec/ppc/dsputil_altivec.h +++ b/libavcodec/ppc/dsputil_altivec.h @@ -38,7 +38,8 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); -void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx); +void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx); #endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */ diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c index e9a5423786..cbb1cfbcc8 100644 --- a/libavcodec/ppc/dsputil_ppc.c +++ b/libavcodec/ppc/dsputil_ppc.c @@ -125,11 +125,10 @@ static long check_dcbzl_effect(void) return count; } -av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { - const int high_bit_depth = avctx->bits_per_raw_sample > 8; int mm_flags = av_get_cpu_flags(); - // common optimizations whether AltiVec is available or not if (!high_bit_depth) { switch (check_dcbzl_effect()) { @@ -145,25 +144,25 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx) } if (PPC_ALTIVEC(mm_flags)) { - ff_dsputil_init_altivec(c, avctx); + ff_dsputil_init_altivec(c, avctx, high_bit_depth); ff_int_init_altivec(c, avctx); c->gmc1 = ff_gmc1_altivec; + if (!high_bit_depth) { #if CONFIG_ENCODERS - if (avctx->bits_per_raw_sample <= 8 && - (avctx->dct_algo == FF_DCT_AUTO || - avctx->dct_algo == FF_DCT_ALTIVEC)) { - c->fdct = ff_fdct_altivec; - } + if (avctx->dct_algo == FF_DCT_AUTO || + avctx->dct_algo == FF_DCT_ALTIVEC) { + c->fdct = ff_fdct_altivec; + } #endif //CONFIG_ENCODERS - - if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) { + if (avctx->lowres == 0) { if ((avctx->idct_algo == FF_IDCT_AUTO) || (avctx->idct_algo == FF_IDCT_ALTIVEC)) { c->idct_put = ff_idct_put_altivec; c->idct_add = ff_idct_add_altivec; c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; } + } } } } diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index 4e518ad56d..8639383dd2 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -521,11 +521,9 @@ do { \ } while (0) static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_MMX_INLINE - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - c->put_pixels_clamped = ff_put_pixels_clamped_mmx; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx; @@ -549,11 +547,9 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, } static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_MMXEXT_INLINE - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX && avctx->lowres == 0) { c->idct_put = ff_idct_xvid_mmxext_put; c->idct_add = ff_idct_xvid_mmxext_add; @@ -580,11 +576,9 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, } static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_SSE_INLINE - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - c->vector_clipf = ff_vector_clipf_sse; /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ @@ -605,11 +599,9 @@ static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, } static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_SSE2_INLINE - const int high_bit_depth = avctx->bits_per_raw_sample > 8; - if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX && avctx->lowres == 0) { c->idct_put = ff_idct_xvid_sse2_put; c->idct_add = ff_idct_xvid_sse2_add; @@ -631,7 +623,7 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, } static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_SSSE3_EXTERNAL c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; @@ -645,14 +637,15 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, } static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx, - int cpu_flags) + int cpu_flags, unsigned high_bit_depth) { #if HAVE_SSE4_EXTERNAL c->vector_clip_int32 = ff_vector_clip_int32_sse4; #endif /* HAVE_SSE4_EXTERNAL */ } -av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); @@ -665,7 +658,7 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx) #if HAVE_INLINE_ASM const int idct_algo = avctx->idct_algo; - if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) { + if (avctx->lowres == 0 && !high_bit_depth) { if (idct_algo == FF_IDCT_AUTO || idct_algo == FF_IDCT_SIMPLEMMX) { c->idct_put = ff_simple_idct_put_mmx; c->idct_add = ff_simple_idct_add_mmx; @@ -679,24 +672,24 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx) } #endif /* HAVE_INLINE_ASM */ - dsputil_init_mmx(c, avctx, cpu_flags); + dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth); } if (X86_MMXEXT(cpu_flags)) - dsputil_init_mmxext(c, avctx, cpu_flags); + dsputil_init_mmxext(c, avctx, cpu_flags, high_bit_depth); if (X86_SSE(cpu_flags)) - dsputil_init_sse(c, avctx, cpu_flags); + dsputil_init_sse(c, avctx, cpu_flags, high_bit_depth); if (X86_SSE2(cpu_flags)) - dsputil_init_sse2(c, avctx, cpu_flags); + dsputil_init_sse2(c, avctx, cpu_flags, high_bit_depth); if (EXTERNAL_SSSE3(cpu_flags)) - dsputil_init_ssse3(c, avctx, cpu_flags); + dsputil_init_ssse3(c, avctx, cpu_flags, high_bit_depth); if (EXTERNAL_SSE4(cpu_flags)) - dsputil_init_sse4(c, avctx, cpu_flags); + dsputil_init_sse4(c, avctx, cpu_flags, high_bit_depth); if (CONFIG_ENCODERS) - ff_dsputilenc_init_mmx(c, avctx); + ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); } diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h index b45d8cffcd..22fc7eebe6 100644 --- a/libavcodec/x86/dsputil_x86.h +++ b/libavcodec/x86/dsputil_x86.h @@ -104,7 +104,8 @@ "psubb "#regb", "#regr" \n\t" \ "psubb "#regd", "#regp" \n\t" -void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx); +void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth); void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx); void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index 5038d946a8..da05d3689e 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -987,16 +987,15 @@ hadamard_func(mmxext) hadamard_func(sse2) hadamard_func(ssse3) -av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) +av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, + unsigned high_bit_depth) { int cpu_flags = av_get_cpu_flags(); const int dct_algo = avctx->dct_algo; #if HAVE_YASM - int bit_depth = avctx->bits_per_raw_sample; - if (EXTERNAL_MMX(cpu_flags)) { - if (bit_depth <= 8) + if (!high_bit_depth) c->get_pixels = ff_get_pixels_mmx; c->diff_pixels = ff_diff_pixels_mmx; c->pix_sum = ff_pix_sum16_mmx; @@ -1004,13 +1003,13 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) } if (EXTERNAL_SSE2(cpu_flags)) - if (bit_depth <= 8) + if (!high_bit_depth) c->get_pixels = ff_get_pixels_sse2; #endif /* HAVE_YASM */ #if HAVE_INLINE_ASM if (INLINE_MMX(cpu_flags)) { - if (avctx->bits_per_raw_sample <= 8 && + if (!high_bit_depth && (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) c->fdct = ff_fdct_mmx; @@ -1040,7 +1039,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) } if (INLINE_MMXEXT(cpu_flags)) { - if (avctx->bits_per_raw_sample <= 8 && + if (!high_bit_depth && (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) c->fdct = ff_fdct_mmxext; @@ -1055,7 +1054,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx) } if (INLINE_SSE2(cpu_flags)) { - if (avctx->bits_per_raw_sample <= 8 && + if (!high_bit_depth && (dct_algo == FF_DCT_AUTO || dct_algo == FF_DCT_MMX)) c->fdct = ff_fdct_sse2;