|
|
|
@ -942,7 +942,8 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_ |
|
|
|
|
{ |
|
|
|
|
int y, dst_aligned, src_aligned; |
|
|
|
|
vec_u8 vsrc, vdst; |
|
|
|
|
vec_u16 vtemp, vlog2_denom, vweights, vweightd, voffset, v0, v1, v2, v3; |
|
|
|
|
vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3; |
|
|
|
|
vec_u16 vlog2_denom; |
|
|
|
|
DECLARE_ALIGNED_16(int32_t, temp[4]); |
|
|
|
|
LOAD_ZERO; |
|
|
|
|
|
|
|
|
@ -977,16 +978,16 @@ void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if (w == 16 || dst_aligned) { |
|
|
|
|
v0 = vec_mladd(v0, vweightd, zero_u16v); |
|
|
|
|
v2 = vec_mladd(v2, vweights, zero_u16v); |
|
|
|
|
v0 = vec_mladd(v0, vweightd, zero_s16v); |
|
|
|
|
v2 = vec_mladd(v2, vweights, zero_s16v); |
|
|
|
|
|
|
|
|
|
v0 = vec_adds(v0, voffset); |
|
|
|
|
v0 = vec_adds(v0, v2); |
|
|
|
|
v0 = vec_sra(v0, vlog2_denom); |
|
|
|
|
} |
|
|
|
|
if (w == 16 || !dst_aligned) { |
|
|
|
|
v1 = vec_mladd(v1, vweightd, zero_u16v); |
|
|
|
|
v3 = vec_mladd(v3, vweights, zero_u16v); |
|
|
|
|
v1 = vec_mladd(v1, vweightd, zero_s16v); |
|
|
|
|
v3 = vec_mladd(v3, vweights, zero_s16v); |
|
|
|
|
|
|
|
|
|
v1 = vec_adds(v1, voffset); |
|
|
|
|
v1 = vec_adds(v1, v3); |
|
|
|
|