|
|
@ -79,17 +79,18 @@ static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2, |
|
|
|
return u.score[3]; |
|
|
|
return u.score[3]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static int32_t scalarproduct_int16_altivec(int16_t *v1, const int16_t *v2, |
|
|
|
static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2, |
|
|
|
int order) |
|
|
|
int order) |
|
|
|
{ |
|
|
|
{ |
|
|
|
int i; |
|
|
|
int i; |
|
|
|
LOAD_ZERO; |
|
|
|
LOAD_ZERO; |
|
|
|
register vec_s16 vec1, *pv; |
|
|
|
const vec_s16 *pv; |
|
|
|
|
|
|
|
register vec_s16 vec1; |
|
|
|
register vec_s32 res = vec_splat_s32(0), t; |
|
|
|
register vec_s32 res = vec_splat_s32(0), t; |
|
|
|
int32_t ires; |
|
|
|
int32_t ires; |
|
|
|
|
|
|
|
|
|
|
|
for(i = 0; i < order; i += 8){ |
|
|
|
for(i = 0; i < order; i += 8){ |
|
|
|
pv = (vec_s16*)v1; |
|
|
|
pv = (const vec_s16*)v1; |
|
|
|
vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1)); |
|
|
|
vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1)); |
|
|
|
t = vec_msum(vec1, vec_ld(0, v2), zero_s32v); |
|
|
|
t = vec_msum(vec1, vec_ld(0, v2), zero_s32v); |
|
|
|
res = vec_sums(t, res); |
|
|
|
res = vec_sums(t, res); |
|
|
|