From d0bf20a4f25ac5de021c860a0c8ad05638ee2078 Mon Sep 17 00:00:00 2001 From: Luca Barbato Date: Tue, 12 May 2015 02:38:56 +0200 Subject: [PATCH] ppc: vsx: Implement diff_pixels and get_pixels Use a macro to abstract the endianness. --- libavcodec/ppc/pixblockdsp.c | 44 ++++++++++++++++++++++++++++++++++++ libavutil/ppc/util_altivec.h | 12 ++++++++++ 2 files changed, 56 insertions(+) diff --git a/libavcodec/ppc/pixblockdsp.c b/libavcodec/ppc/pixblockdsp.c index 2c28e29d3f..9cac70e2ef 100644 --- a/libavcodec/ppc/pixblockdsp.c +++ b/libavcodec/ppc/pixblockdsp.c @@ -133,6 +133,40 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, #endif /* HAVE_ALTIVEC */ +#if HAVE_VSX +static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels, + int line_size) +{ + int i; + for (i = 0; i < 8; i++) { + vec_s16 shorts = vsx_ld_u8_s16(0, pixels); + + vec_vsx_st(shorts, i * 16, block); + + pixels += line_size; + } +} + +static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1, + const uint8_t *s2, int stride) +{ + int i; + vec_s16 shorts1, shorts2; + for (i = 0; i < 8; i++) { + shorts1 = vsx_ld_u8_s16(0, s1); + shorts2 = vsx_ld_u8_s16(0, s2); + + shorts1 = vec_sub(shorts1, shorts2); + + vec_vsx_st(shorts1, 0, block); + + s1 += stride; + s2 += stride; + block += 8; + } +} +#endif /* HAVE_VSX */ + av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth) @@ -147,4 +181,14 @@ av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c, c->get_pixels = get_pixels_altivec; } #endif /* HAVE_ALTIVEC */ + +#if HAVE_VSX + if (!PPC_VSX(av_get_cpu_flags())) + return; + + c->diff_pixels = diff_pixels_vsx; + + if (!high_bit_depth) + c->get_pixels = get_pixels_vsx; +#endif /* HAVE_VSX */ } diff --git a/libavutil/ppc/util_altivec.h b/libavutil/ppc/util_altivec.h index 8b31327ef4..02cff186f9 100644 --- a/libavutil/ppc/util_altivec.h +++ b/libavutil/ppc/util_altivec.h @@ -111,4 +111,16 @@ static inline vec_u8 load_with_perm_vec(int offset, uint8_t *src, vec_u8 perm_ve #endif /* HAVE_ALTIVEC */ +#if HAVE_VSX +#if HAVE_BIGENDIAN +#define vsx_ld_u8_s16(off, p) \ + ((vec_s16)vec_mergeh((vec_u8)vec_splat_u8(0), \ + (vec_u8)vec_vsx_ld((off), (p)))) +#else +#define vsx_ld_u8_s16(off, p) \ + ((vec_s16)vec_mergeh((vec_u8)vec_vsx_ld((off), (p)), \ + (vec_u8)vec_splat_u8(0))) +#endif /* HAVE_BIGENDIAN */ +#endif /* HAVE_VSX */ + #endif /* AVUTIL_PPC_UTIL_ALTIVEC_H */