|
|
|
@ -183,7 +183,7 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, |
|
|
|
|
int src_pos, phase; |
|
|
|
|
const uint8_t *s; |
|
|
|
|
int16_t *filter; |
|
|
|
|
mmx_t tmp; |
|
|
|
|
uint64_t tmp; |
|
|
|
|
|
|
|
|
|
src_pos = src_start; |
|
|
|
|
pxor_r2r(mm7, mm7); |
|
|
|
@ -200,13 +200,13 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, |
|
|
|
|
packuswb_r2r(mm7, mm3); |
|
|
|
|
packuswb_r2r(mm7, mm2); |
|
|
|
|
movq_r2m(mm0, tmp); |
|
|
|
|
dst[0] = tmp.ub[0]; |
|
|
|
|
dst[0] = tmp & 0xFF; |
|
|
|
|
movq_r2m(mm1, tmp); |
|
|
|
|
dst[1] = tmp.ub[0]; |
|
|
|
|
dst[1] = tmp & 0xFF; |
|
|
|
|
movq_r2m(mm2, tmp); |
|
|
|
|
dst[2] = tmp.ub[0]; |
|
|
|
|
dst[2] = tmp & 0xFF; |
|
|
|
|
movq_r2m(mm3, tmp); |
|
|
|
|
dst[3] = tmp.ub[0]; |
|
|
|
|
dst[3] = tmp & 0xFF; |
|
|
|
|
dst += 4; |
|
|
|
|
dst_width -= 4; |
|
|
|
|
} |
|
|
|
@ -214,7 +214,7 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, |
|
|
|
|
FILTER4(mm0); |
|
|
|
|
packuswb_r2r(mm7, mm0); |
|
|
|
|
movq_r2m(mm0, tmp); |
|
|
|
|
dst[0] = tmp.ub[0]; |
|
|
|
|
dst[0] = tmp & 0xFF; |
|
|
|
|
dst++; |
|
|
|
|
dst_width--; |
|
|
|
|
} |
|
|
|
@ -224,17 +224,14 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width, |
|
|
|
|
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, |
|
|
|
|
int wrap, int16_t *filter) |
|
|
|
|
{ |
|
|
|
|
int sum, i, v; |
|
|
|
|
int sum, i; |
|
|
|
|
const uint8_t *s; |
|
|
|
|
mmx_t tmp; |
|
|
|
|
mmx_t coefs[4]; |
|
|
|
|
uint64_t tmp; |
|
|
|
|
uint64_t coefs[4]; |
|
|
|
|
|
|
|
|
|
for(i=0;i<4;i++) { |
|
|
|
|
v = filter[i]; |
|
|
|
|
coefs[i].uw[0] = v; |
|
|
|
|
coefs[i].uw[1] = v; |
|
|
|
|
coefs[i].uw[2] = v; |
|
|
|
|
coefs[i].uw[3] = v; |
|
|
|
|
tmp = filter[i]; |
|
|
|
|
coefs[i] = (tmp<<48) + (tmp<<32) + (tmp<<16) + tmp; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
pxor_r2r(mm7, mm7); |
|
|
|
@ -262,7 +259,7 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, |
|
|
|
|
packuswb_r2r(mm7, mm0); |
|
|
|
|
movq_r2m(mm0, tmp); |
|
|
|
|
|
|
|
|
|
*(uint32_t *)dst = tmp.ud[0]; |
|
|
|
|
*(uint32_t *)dst = tmp & 0xFFFFFFFF; |
|
|
|
|
dst += 4; |
|
|
|
|
s += 4; |
|
|
|
|
dst_width -= 4; |
|
|
|
|