|
|
|
@ -86,7 +86,7 @@ static inline void idct_row (int16_t * row, int offset, |
|
|
|
|
c5, -c1, c3, -c1, \
|
|
|
|
|
c7, c3, c7, -c5 } |
|
|
|
|
|
|
|
|
|
static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table) |
|
|
|
|
static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table) |
|
|
|
|
{ |
|
|
|
|
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
|
|
|
|
|
|
|
|
@ -102,7 +102,7 @@ static inline void mmxext_row_head (int16_t * row, int offset, int16_t * table) |
|
|
|
|
pshufw_r2r (mm2, mm2, 0x4e); // mm2 = x2 x0 x6 x4
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline void mmxext_row (int16_t * table, int32_t * rounder) |
|
|
|
|
static inline void mmxext_row (const int16_t * table, const int32_t * rounder) |
|
|
|
|
{ |
|
|
|
|
movq_m2r (*(table+8), mm1); // mm1 = -C5 -C1 C3 C1
|
|
|
|
|
pmaddwd_r2r (mm2, mm4); // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
|
|
|
|
@ -160,7 +160,7 @@ static inline void mmxext_row_tail (int16_t * row, int store) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline void mmxext_row_mid (int16_t * row, int store, |
|
|
|
|
int offset, int16_t * table) |
|
|
|
|
int offset, const int16_t * table) |
|
|
|
|
{ |
|
|
|
|
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
|
|
|
|
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
|
|
|
@ -198,7 +198,7 @@ static inline void mmxext_row_mid (int16_t * row, int store, |
|
|
|
|
c5, -c1, c7, -c5, \
|
|
|
|
|
c7, c3, c3, -c1 } |
|
|
|
|
|
|
|
|
|
static inline void mmx_row_head (int16_t * row, int offset, int16_t * table) |
|
|
|
|
static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table) |
|
|
|
|
{ |
|
|
|
|
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
|
|
|
|
|
|
|
|
@ -217,7 +217,7 @@ static inline void mmx_row_head (int16_t * row, int offset, int16_t * table) |
|
|
|
|
punpckhdq_r2r (mm2, mm2); // mm2 = x6 x4 x6 x4
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline void mmx_row (int16_t * table, int32_t * rounder) |
|
|
|
|
static inline void mmx_row (const int16_t * table, const int32_t * rounder) |
|
|
|
|
{ |
|
|
|
|
pmaddwd_r2r (mm2, mm4); // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
|
|
|
|
|
punpckldq_r2r (mm5, mm5); // mm5 = x3 x1 x3 x1
|
|
|
|
@ -281,7 +281,7 @@ static inline void mmx_row_tail (int16_t * row, int store) |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static inline void mmx_row_mid (int16_t * row, int store, |
|
|
|
|
int offset, int16_t * table) |
|
|
|
|
int offset, const int16_t * table) |
|
|
|
|
{ |
|
|
|
|
movq_m2r (*(row+offset), mm2); // mm2 = x6 x4 x2 x0
|
|
|
|
|
psrad_i2r (ROW_SHIFT, mm0); // mm0 = y3 y2
|
|
|
|
@ -395,10 +395,10 @@ static inline void idct_col (int16_t * col, int offset) |
|
|
|
|
#define T3 43790 |
|
|
|
|
#define C4 23170 |
|
|
|
|
|
|
|
|
|
static short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; |
|
|
|
|
static short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; |
|
|
|
|
static short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; |
|
|
|
|
static short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; |
|
|
|
|
static const short _T1[] ATTR_ALIGN(8) = {T1,T1,T1,T1}; |
|
|
|
|
static const short _T2[] ATTR_ALIGN(8) = {T2,T2,T2,T2}; |
|
|
|
|
static const short _T3[] ATTR_ALIGN(8) = {T3,T3,T3,T3}; |
|
|
|
|
static const short _C4[] ATTR_ALIGN(8) = {C4,C4,C4,C4}; |
|
|
|
|
|
|
|
|
|
/* column code adapted from peter gubanov */ |
|
|
|
|
/* http://www.elecard.com/peter/idct.shtml */ |
|
|
|
@ -536,20 +536,20 @@ static inline void idct_col (int16_t * col, int offset) |
|
|
|
|
#undef C4 |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
static int32_t rounder0[] ATTR_ALIGN(8) = |
|
|
|
|
static const int32_t rounder0[] ATTR_ALIGN(8) = |
|
|
|
|
rounder ((1 << (COL_SHIFT - 1)) - 0.5); |
|
|
|
|
static int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); |
|
|
|
|
static int32_t rounder1[] ATTR_ALIGN(8) = |
|
|
|
|
static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0); |
|
|
|
|
static const int32_t rounder1[] ATTR_ALIGN(8) = |
|
|
|
|
rounder (1.25683487303); /* C1*(C1/C4+C1+C7)/2 */ |
|
|
|
|
static int32_t rounder7[] ATTR_ALIGN(8) = |
|
|
|
|
static const int32_t rounder7[] ATTR_ALIGN(8) = |
|
|
|
|
rounder (-0.25); /* C1*(C7/C4+C7-C1)/2 */ |
|
|
|
|
static int32_t rounder2[] ATTR_ALIGN(8) = |
|
|
|
|
static const int32_t rounder2[] ATTR_ALIGN(8) = |
|
|
|
|
rounder (0.60355339059); /* C2 * (C6+C2)/2 */ |
|
|
|
|
static int32_t rounder6[] ATTR_ALIGN(8) = |
|
|
|
|
static const int32_t rounder6[] ATTR_ALIGN(8) = |
|
|
|
|
rounder (-0.25); /* C2 * (C6-C2)/2 */ |
|
|
|
|
static int32_t rounder3[] ATTR_ALIGN(8) = |
|
|
|
|
static const int32_t rounder3[] ATTR_ALIGN(8) = |
|
|
|
|
rounder (0.087788325588); /* C3*(-C3/C4+C3+C5)/2 */ |
|
|
|
|
static int32_t rounder5[] ATTR_ALIGN(8) = |
|
|
|
|
static const int32_t rounder5[] ATTR_ALIGN(8) = |
|
|
|
|
rounder (-0.441341716183); /* C3*(-C5/C4+C5-C3)/2 */ |
|
|
|
|
|
|
|
|
|
#undef COL_SHIFT |
|
|
|
@ -558,13 +558,13 @@ static int32_t rounder5[] ATTR_ALIGN(8) = |
|
|
|
|
#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \ |
|
|
|
|
void idct (int16_t * block) \
|
|
|
|
|
{ \
|
|
|
|
|
static int16_t table04[] ATTR_ALIGN(16) = \
|
|
|
|
|
static const int16_t table04[] ATTR_ALIGN(16) = \
|
|
|
|
|
table (22725, 21407, 19266, 16384, 12873, 8867, 4520); \
|
|
|
|
|
static int16_t table17[] ATTR_ALIGN(16) = \
|
|
|
|
|
static const int16_t table17[] ATTR_ALIGN(16) = \
|
|
|
|
|
table (31521, 29692, 26722, 22725, 17855, 12299, 6270); \
|
|
|
|
|
static int16_t table26[] ATTR_ALIGN(16) = \
|
|
|
|
|
static const int16_t table26[] ATTR_ALIGN(16) = \
|
|
|
|
|
table (29692, 27969, 25172, 21407, 16819, 11585, 5906); \
|
|
|
|
|
static int16_t table35[] ATTR_ALIGN(16) = \
|
|
|
|
|
static const int16_t table35[] ATTR_ALIGN(16) = \
|
|
|
|
|
table (26722, 25172, 22654, 19266, 15137, 10426, 5315); \
|
|
|
|
|
\
|
|
|
|
|
idct_row_head (block, 0*8, table04); \
|
|
|
|
|