From b662e8395b81ee58e8e9b293904314f8918c8fae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Rullg=C3=A5rd?= Date: Mon, 24 Aug 2009 10:36:13 +0000 Subject: [PATCH] PPC: simplify loading some values into altivec registers Instead of filling a local array with the desired value and loading it, load a single element and vec_splat() it to fill the vector. Originally committed as revision 19691 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/ppc/gmc_altivec.c | 7 +++---- libavcodec/ppc/mpegvideo_altivec.c | 25 +++++++------------------ 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/libavcodec/ppc/gmc_altivec.c b/libavcodec/ppc/gmc_altivec.c index c77c7162b3..70c0cf9eb8 100644 --- a/libavcodec/ppc/gmc_altivec.c +++ b/libavcodec/ppc/gmc_altivec.c @@ -23,6 +23,7 @@ #include "libavcodec/dsputil.h" #include "dsputil_ppc.h" #include "util_altivec.h" +#include "types_altivec.h" /* altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, @@ -32,9 +33,7 @@ void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) { POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); - const DECLARE_ALIGNED_16(unsigned short, rounder_a[8]) = - {rounder, rounder, rounder, rounder, - rounder, rounder, rounder, rounder}; + const DECLARE_ALIGNED_16(unsigned short, rounder_a) = rounder; const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) = { (16-x16)*(16-y16), /* A */ @@ -60,7 +59,7 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); Cv = vec_splat(tempA, 2); Dv = vec_splat(tempA, 3); - rounderV = vec_ld(0, (unsigned short*)rounder_a); + rounderV = vec_splat((vec_u16)vec_lde(0, &rounder_a), 0); // we'll be able to pick-up our 9 char elements // at src from those 32 bytes diff --git a/libavcodec/ppc/mpegvideo_altivec.c b/libavcodec/ppc/mpegvideo_altivec.c index 74775f05fd..8348e684bd 100644 --- a/libavcodec/ppc/mpegvideo_altivec.c +++ b/libavcodec/ppc/mpegvideo_altivec.c @@ -28,6 +28,8 @@ #include "dsputil_ppc.h" #include "util_altivec.h" +#include "types_altivec.h" + // Swaps two variables (used for altivec registers) #define SWAP(a,b) \ do { \ @@ -504,29 +506,16 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); { register const vector signed short vczero = (const vector signed short)vec_splat_s16(0); - DECLARE_ALIGNED_16(short, qmul8[]) = - { - qmul, qmul, qmul, qmul, - qmul, qmul, qmul, qmul - }; - DECLARE_ALIGNED_16(short, qadd8[]) = - { - qadd, qadd, qadd, qadd, - qadd, qadd, qadd, qadd - }; - DECLARE_ALIGNED_16(short, nqadd8[]) = - { - -qadd, -qadd, -qadd, -qadd, - -qadd, -qadd, -qadd, -qadd - }; + DECLARE_ALIGNED_16(short, qmul8) = qmul; + DECLARE_ALIGNED_16(short, qadd8) = qadd; register vector signed short blockv, qmulv, qaddv, nqaddv, temp1; register vector bool short blockv_null, blockv_neg; register short backup_0 = block[0]; register int j = 0; - qmulv = vec_ld(0, qmul8); - qaddv = vec_ld(0, qadd8); - nqaddv = vec_ld(0, nqadd8); + qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0); + qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0); + nqaddv = vec_sub(vczero, qaddv); #if 0 // block *is* 16 bytes-aligned, it seems. // first make sure block[j] is 16 bytes-aligned