@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
__asm__ volatile ( " movq %0, %%mm7 " : : " m " ( mask32a ) : " memory " ) ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movd %1 , %%mm0 \n \t "
" punpckldq 3%1, %%mm0 \n \t "
" movd 6%1, %%mm1 \n \t "
" punpckldq 9%1, %%mm1 \n \t "
" movd 12%1, %%mm2 \n \t "
" punpckldq 15%1, %%mm2 \n \t "
" movd 18%1, %%mm3 \n \t "
" punpckldq 21%1, %%mm3 \n \t "
PREFETCH " 32( %1) \n \t "
" movd (%1) , %%mm0 \n \t "
" punpckldq 3( %1) , %%mm0 \n \t "
" movd 6( %1) , %%mm1 \n \t "
" punpckldq 9( %1) , %%mm1 \n \t "
" movd 12( %1) , %%mm2 \n \t "
" punpckldq 15( %1) , %%mm2 \n \t "
" movd 18( %1) , %%mm3 \n \t "
" punpckldq 21( %1) , %%mm3 \n \t "
" por %%mm7, %%mm0 \n \t "
" por %%mm7, %%mm1 \n \t "
" por %%mm7, %%mm2 \n \t "
" por %%mm7, %%mm3 \n \t "
MOVNTQ " %%mm0, %0 \n \t "
MOVNTQ " %%mm1, 8%0 \n \t "
MOVNTQ " %%mm2, 16%0 \n \t "
MOVNTQ " %%mm3, 24%0 "
: " =m " ( * dest )
: " m " ( * s )
MOVNTQ " %%mm0, (%0) \n \t "
MOVNTQ " %%mm1, 8(%0) \n \t "
MOVNTQ " %%mm2, 16(%0) \n \t "
MOVNTQ " %%mm3, 24(%0) "
: : " r " ( dest ) , " r " ( s )
: " memory " ) ;
dest + = 32 ;
s + = 24 ;
@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr
" pand " MANGLE ( mask24hhhh ) " , %%mm5 \n \t " \
" por %%mm5, %%mm4 \n \t " \
\
MOVNTQ " %%mm0, %0 \n \t " \
MOVNTQ " %%mm1, 8%0 \n \t " \
MOVNTQ " %%mm4, 16%0 "
MOVNTQ " %%mm0, (%0) \n \t " \
MOVNTQ " %%mm1, 8(%0) \n \t " \
MOVNTQ " %%mm4, 16( %0) "
static inline void RENAME ( rgb32tobgr24 ) ( const uint8_t * src , uint8_t * dst , int src_size )
@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 31 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movq %1 , %%mm0 \n \t "
" movq 8%1, %%mm1 \n \t "
" movq 16%1, %%mm4 \n \t "
" movq 24%1, %%mm5 \n \t "
PREFETCH " 32( %1) \n \t "
" movq (%1) , %%mm0 \n \t "
" movq 8( %1) , %%mm1 \n \t "
" movq 16( %1) , %%mm4 \n \t "
" movq 24( %1) , %%mm5 \n \t "
" movq %%mm0, %%mm2 \n \t "
" movq %%mm1, %%mm3 \n \t "
" movq %%mm4, %%mm6 \n \t "
" movq %%mm5, %%mm7 \n \t "
STORE_BGR24_MMX
: " =m " ( * dest )
: " m " ( * s )
: : " r " ( dest ) , " r " ( s )
: " memory " ) ;
dest + = 24 ;
s + = 32 ;
@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movq %1 , %%mm0 \n \t "
" movq 8%1, %%mm2 \n \t "
PREFETCH " 32(%1) \n \t "
" movq (%1) , %%mm0 \n \t "
" movq 8( %1) , %%mm2 \n \t "
" movq %%mm0, %%mm1 \n \t "
" movq %%mm2, %%mm3 \n \t "
" pand %%mm4, %%mm0 \n \t "
" pand %%mm4, %%mm2 \n \t "
" paddw %%mm1, %%mm0 \n \t "
" paddw %%mm3, %%mm2 \n \t "
MOVNTQ " %%mm0, %0 \n \t "
MOVNTQ " %%mm2, 8%0 "
: " =m " ( * d )
: " m " ( * s )
MOVNTQ " %%mm0, (%0) \n \t "
MOVNTQ " %%mm2, 8(%0) "
: : " r " ( d ) , " r " ( s )
) ;
d + = 16 ;
s + = 16 ;
@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movq %1 , %%mm0 \n \t "
" movq 8%1, %%mm2 \n \t "
PREFETCH " 32(%1) \n \t "
" movq (%1) , %%mm0 \n \t "
" movq 8( %1) , %%mm2 \n \t "
" movq %%mm0, %%mm1 \n \t "
" movq %%mm2, %%mm3 \n \t "
" psrlq $1, %%mm0 \n \t "
@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s
" pand %%mm6, %%mm3 \n \t "
" por %%mm1, %%mm0 \n \t "
" por %%mm3, %%mm2 \n \t "
MOVNTQ " %%mm0, %0 \n \t "
MOVNTQ " %%mm2, 8%0 "
: " =m " ( * d )
: " m " ( * s )
MOVNTQ " %%mm0, (%0) \n \t "
MOVNTQ " %%mm2, 8(%0) "
: : " r " ( d ) , " r " ( s )
) ;
d + = 16 ;
s + = 16 ;
@ -344,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 15 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movd %1 , %%mm0 \n \t "
" movd 4%1, %%mm3 \n \t "
" punpckldq 8%1, %%mm0 \n \t "
" punpckldq 12%1, %%mm3 \n \t "
PREFETCH " 32( %1) \n \t "
" movd (%1) , %%mm0 \n \t "
" movd 4( %1) , %%mm3 \n \t "
" punpckldq 8( %1) , %%mm0 \n \t "
" punpckldq 12( %1) , %%mm3 \n \t "
" movq %%mm0, %%mm1 \n \t "
" movq %%mm0, %%mm2 \n \t "
" movq %%mm3, %%mm4 \n \t "
@ -371,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr
" por %%mm5, %%mm3 \n \t "
" psllq $16, %%mm3 \n \t "
" por %%mm3, %%mm0 \n \t "
MOVNTQ " %%mm0, %0 \n \t "
: " =m " ( * d ) : " m " ( * s ) , " m " ( blue_16mask ) : " memory " ) ;
MOVNTQ " %%mm0, (%0) \n \t "
: : " r " ( d ) , " r " ( s ) , " m " ( blue_16mask ) : " memory " ) ;
d + = 4 ;
s + = 16 ;
}
@ -449,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 15 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movd %1 , %%mm0 \n \t "
" movd 4%1, %%mm3 \n \t "
" punpckldq 8%1, %%mm0 \n \t "
" punpckldq 12%1, %%mm3 \n \t "
PREFETCH " 32( %1) \n \t "
" movd (%1) , %%mm0 \n \t "
" movd 4( %1) , %%mm3 \n \t "
" punpckldq 8( %1) , %%mm0 \n \t "
" punpckldq 12( %1) , %%mm3 \n \t "
" movq %%mm0, %%mm1 \n \t "
" movq %%mm0, %%mm2 \n \t "
" movq %%mm3, %%mm4 \n \t "
@ -476,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr
" por %%mm5, %%mm3 \n \t "
" psllq $16, %%mm3 \n \t "
" por %%mm3, %%mm0 \n \t "
MOVNTQ " %%mm0, %0 \n \t "
: " =m " ( * d ) : " m " ( * s ) , " m " ( blue_15mask ) : " memory " ) ;
MOVNTQ " %%mm0, (%0) \n \t "
: : " r " ( d ) , " r " ( s ) , " m " ( blue_15mask ) : " memory " ) ;
d + = 4 ;
s + = 16 ;
}
@ -504,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 11 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movd %1 , %%mm0 \n \t "
" movd 3%1, %%mm3 \n \t "
" punpckldq 6%1, %%mm0 \n \t "
" punpckldq 9%1, %%mm3 \n \t "
PREFETCH " 32( %1) \n \t "
" movd (%1) , %%mm0 \n \t "
" movd 3( %1) , %%mm3 \n \t "
" punpckldq 6( %1) , %%mm0 \n \t "
" punpckldq 9( %1) , %%mm3 \n \t "
" movq %%mm0, %%mm1 \n \t "
" movq %%mm0, %%mm2 \n \t "
" movq %%mm3, %%mm4 \n \t "
@ -531,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr
" por %%mm5, %%mm3 \n \t "
" psllq $16, %%mm3 \n \t "
" por %%mm3, %%mm0 \n \t "
MOVNTQ " %%mm0, %0 \n \t "
: " =m " ( * d ) : " m " ( * s ) , " m " ( blue_16mask ) : " memory " ) ;
MOVNTQ " %%mm0, (%0) \n \t "
: : " r " ( d ) , " r " ( s ) , " m " ( blue_16mask ) : " memory " ) ;
d + = 4 ;
s + = 12 ;
}
@ -561,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movd %1 , %%mm0 \n \t "
" movd 3%1, %%mm3 \n \t "
" punpckldq 6%1, %%mm0 \n \t "
" punpckldq 9%1, %%mm3 \n \t "
PREFETCH " 32( %1) \n \t "
" movd (%1) , %%mm0 \n \t "
" movd 3( %1) , %%mm3 \n \t "
" punpckldq 6( %1) , %%mm0 \n \t "
" punpckldq 9( %1) , %%mm3 \n \t "
" movq %%mm0, %%mm1 \n \t "
" movq %%mm0, %%mm2 \n \t "
" movq %%mm3, %%mm4 \n \t "
@ -588,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s
" por %%mm5, %%mm3 \n \t "
" psllq $16, %%mm3 \n \t "
" por %%mm3, %%mm0 \n \t "
MOVNTQ " %%mm0, %0 \n \t "
: " =m " ( * d ) : " m " ( * s ) , " m " ( blue_16mask ) : " memory " ) ;
MOVNTQ " %%mm0, (%0) \n \t "
: : " r " ( d ) , " r " ( s ) , " m " ( blue_16mask ) : " memory " ) ;
d + = 4 ;
s + = 12 ;
}
@ -618,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 11 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movd %1 , %%mm0 \n \t "
" movd 3%1, %%mm3 \n \t "
" punpckldq 6%1, %%mm0 \n \t "
" punpckldq 9%1, %%mm3 \n \t "
PREFETCH " 32( %1) \n \t "
" movd (%1) , %%mm0 \n \t "
" movd 3( %1) , %%mm3 \n \t "
" punpckldq 6( %1) , %%mm0 \n \t "
" punpckldq 9( %1) , %%mm3 \n \t "
" movq %%mm0, %%mm1 \n \t "
" movq %%mm0, %%mm2 \n \t "
" movq %%mm3, %%mm4 \n \t "
@ -645,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr
" por %%mm5, %%mm3 \n \t "
" psllq $16, %%mm3 \n \t "
" por %%mm3, %%mm0 \n \t "
MOVNTQ " %%mm0, %0 \n \t "
: " =m " ( * d ) : " m " ( * s ) , " m " ( blue_15mask ) : " memory " ) ;
MOVNTQ " %%mm0, (%0) \n \t "
: : " r " ( d ) , " r " ( s ) , " m " ( blue_15mask ) : " memory " ) ;
d + = 4 ;
s + = 12 ;
}
@ -675,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 15 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movd %1 , %%mm0 \n \t "
" movd 3%1, %%mm3 \n \t "
" punpckldq 6%1, %%mm0 \n \t "
" punpckldq 9%1, %%mm3 \n \t "
PREFETCH " 32( %1) \n \t "
" movd (%1) , %%mm0 \n \t "
" movd 3( %1) , %%mm3 \n \t "
" punpckldq 6( %1) , %%mm0 \n \t "
" punpckldq 9( %1) , %%mm3 \n \t "
" movq %%mm0, %%mm1 \n \t "
" movq %%mm0, %%mm2 \n \t "
" movq %%mm3, %%mm4 \n \t "
@ -702,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
" por %%mm5, %%mm3 \n \t "
" psllq $16, %%mm3 \n \t "
" por %%mm3, %%mm0 \n \t "
MOVNTQ " %%mm0, %0 \n \t "
: " =m " ( * d ) : " m " ( * s ) , " m " ( blue_15mask ) : " memory " ) ;
MOVNTQ " %%mm0, (%0) \n \t "
: : " r " ( d ) , " r " ( s ) , " m " ( blue_15mask ) : " memory " ) ;
d + = 4 ;
s + = 12 ;
}
@ -749,10 +745,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 7 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movq %1 , %%mm0 \n \t "
" movq %1 , %%mm1 \n \t "
" movq %1 , %%mm2 \n \t "
PREFETCH " 32( %1) \n \t "
" movq (%1) , %%mm0 \n \t "
" movq (%1) , %%mm1 \n \t "
" movq (%1) , %%mm2 \n \t "
" pand %2, %%mm0 \n \t "
" pand %3, %%mm1 \n \t "
" pand %4, %%mm2 \n \t "
@ -780,9 +776,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
" movq %%mm0, %%mm6 \n \t "
" movq %%mm3, %%mm7 \n \t "
" movq 8%1, %%mm0 \n \t "
" movq 8%1, %%mm1 \n \t "
" movq 8%1, %%mm2 \n \t "
" movq 8( %1) , %%mm0 \n \t "
" movq 8( %1) , %%mm1 \n \t "
" movq 8( %1) , %%mm2 \n \t "
" pand %2, %%mm0 \n \t "
" pand %3, %%mm1 \n \t "
" pand %4, %%mm2 \n \t "
@ -808,7 +804,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
" por %%mm5, %%mm3 \n \t "
: " =m " ( * d )
: " m " ( * s ) , " m " ( mask15b ) , " m " ( mask15g ) , " m " ( mask15r ) , " m " ( mmx_null )
: " r " ( s ) , " m " ( mask15b ) , " m " ( mask15g ) , " m " ( mask15r ) , " m " ( mmx_null )
: " memory " ) ;
/* borrowed 32 to 24 */
__asm__ volatile (
@ -824,8 +820,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
: " =m " ( * d )
: " m " ( * s )
: : " r " ( d ) , " m " ( * s )
: " memory " ) ;
d + = 24 ;
s + = 8 ;
@ -852,10 +847,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
mm_end = end - 7 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movq %1 , %%mm0 \n \t "
" movq %1 , %%mm1 \n \t "
" movq %1 , %%mm2 \n \t "
PREFETCH " 32( %1) \n \t "
" movq (%1) , %%mm0 \n \t "
" movq (%1) , %%mm1 \n \t "
" movq (%1) , %%mm2 \n \t "
" pand %2, %%mm0 \n \t "
" pand %3, %%mm1 \n \t "
" pand %4, %%mm2 \n \t "
@ -883,9 +878,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
" movq %%mm0, %%mm6 \n \t "
" movq %%mm3, %%mm7 \n \t "
" movq 8%1, %%mm0 \n \t "
" movq 8%1, %%mm1 \n \t "
" movq 8%1, %%mm2 \n \t "
" movq 8( %1) , %%mm0 \n \t "
" movq 8( %1) , %%mm1 \n \t "
" movq 8( %1) , %%mm2 \n \t "
" pand %2, %%mm0 \n \t "
" pand %3, %%mm1 \n \t "
" pand %4, %%mm2 \n \t "
@ -910,7 +905,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
" por %%mm4, %%mm3 \n \t "
" por %%mm5, %%mm3 \n \t "
: " =m " ( * d )
: " m " ( * s ) , " m " ( mask16b ) , " m " ( mask16g ) , " m " ( mask16r ) , " m " ( mmx_null )
: " r " ( s ) , " m " ( mask16b ) , " m " ( mask16g ) , " m " ( mask16r ) , " m " ( mmx_null )
: " memory " ) ;
/* borrowed 32 to 24 */
__asm__ volatile (
@ -926,8 +921,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
STORE_BGR24_MMX
: " =m " ( * d )
: " m " ( * s )
: : " r " ( d ) , " m " ( * s )
: " memory " ) ;
d + = 24 ;
s + = 8 ;
@ -959,8 +953,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
" movq %%mm0, %%mm3 \n \t " \
" punpcklwd %%mm2, %%mm0 \n \t " /* FF R1 G1 B1 FF R0 G0 B0 */ \
" punpckhwd %%mm2, %%mm3 \n \t " /* FF R3 G3 B3 FF R2 G2 B2 */ \
MOVNTQ " %%mm0, %0 \n \t " \
MOVNTQ " %%mm3, 8%0 \n \t " \
MOVNTQ " %%mm0, (%0) \n \t " \
MOVNTQ " %%mm3, 8(%0) \n \t " \
static inline void RENAME ( rgb15to32 ) ( const uint8_t * src , uint8_t * dst , int src_size )
{
@ -975,10 +969,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 3 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movq %1 , %%mm0 \n \t "
" movq %1 , %%mm1 \n \t "
" movq %1 , %%mm2 \n \t "
PREFETCH " 32( %1) \n \t "
" movq (%1) , %%mm0 \n \t "
" movq (%1) , %%mm1 \n \t "
" movq (%1) , %%mm2 \n \t "
" pand %2, %%mm0 \n \t "
" pand %3, %%mm1 \n \t "
" pand %4, %%mm2 \n \t "
@ -986,8 +980,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
" psrlq $2, %%mm1 \n \t "
" psrlq $7, %%mm2 \n \t "
PACK_RGB32
: " =m " ( * d )
: " m " ( * s ) , " m " ( mask15b ) , " m " ( mask15g ) , " m " ( mask15r )
: : " r " ( d ) , " r " ( s ) , " m " ( mask15b ) , " m " ( mask15g ) , " m " ( mask15r )
: " memory " ) ;
d + = 16 ;
s + = 4 ;
@ -1017,10 +1010,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
mm_end = end - 3 ;
while ( s < mm_end ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movq %1 , %%mm0 \n \t "
" movq %1 , %%mm1 \n \t "
" movq %1 , %%mm2 \n \t "
PREFETCH " 32( %1) \n \t "
" movq (%1) , %%mm0 \n \t "
" movq (%1) , %%mm1 \n \t "
" movq (%1) , %%mm2 \n \t "
" pand %2, %%mm0 \n \t "
" pand %3, %%mm1 \n \t "
" pand %4, %%mm2 \n \t "
@ -1028,8 +1021,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
" psrlq $3, %%mm1 \n \t "
" psrlq $8, %%mm2 \n \t "
PACK_RGB32
: " =m " ( * d )
: " m " ( * s ) , " m " ( mask16b ) , " m " ( mask16g ) , " m " ( mask16r )
: : " r " ( d ) , " r " ( s ) , " m " ( mask16b ) , " m " ( mask16g ) , " m " ( mask16r )
: " memory " ) ;
d + = 16 ;
s + = 4 ;
@ -1957,8 +1949,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
int srcStride1 , int srcStride2 ,
int dstStride1 , int dstStride2 )
{
x86_reg y ;
int x , w , h ;
x86_reg x , y ;
int w , h ;
w = width / 2 ; h = height / 2 ;
__asm__ volatile (
PREFETCH " %0 \n \t "
@ -1970,11 +1962,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
x = 0 ;
for ( ; x < w - 31 ; x + = 32 ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movq %1, %%mm0 \n \t "
" movq 8%1, %%mm2 \n \t "
" movq 16%1, %%mm4 \n \t "
" movq 24%1, %%mm6 \n \t "
PREFETCH " 32( %1,%2) \n \t "
" movq ( %1,%2) , %%mm0 \n \t "
" movq 8( %1,%2) , %%mm2 \n \t "
" movq 16( %1,%2) , %%mm4 \n \t "
" movq 24( %1,%2) , %%mm6 \n \t "
" movq %%mm0, %%mm1 \n \t "
" movq %%mm2, %%mm3 \n \t "
" movq %%mm4, %%mm5 \n \t "
@ -1987,16 +1979,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
" punpckhbw %%mm5, %%mm5 \n \t "
" punpcklbw %%mm6, %%mm6 \n \t "
" punpckhbw %%mm7, %%mm7 \n \t "
MOVNTQ " %%mm0, %0 \n \t "
MOVNTQ " %%mm1, 8%0 \n \t "
MOVNTQ " %%mm2, 16%0 \n \t "
MOVNTQ " %%mm3, 24%0 \n \t "
MOVNTQ " %%mm4, 32%0 \n \t "
MOVNTQ " %%mm5, 40%0 \n \t "
MOVNTQ " %%mm6, 48%0 \n \t "
MOVNTQ " %%mm7, 56%0 "
: " =m " ( d [ 2 * x ] )
: " m " ( s1 [ x ] )
MOVNTQ " %%mm0, (%0,%2,2) \n \t "
MOVNTQ " %%mm1, 8(%0,%2,2) \n \t "
MOVNTQ " %%mm2, 16(%0,%2,2) \n \t "
MOVNTQ " %%mm3, 24(%0,%2,2) \n \t "
MOVNTQ " %%mm4, 32(%0,%2,2) \n \t "
MOVNTQ " %%mm5, 40(%0,%2,2) \n \t "
MOVNTQ " %%mm6, 48(%0,%2,2) \n \t "
MOVNTQ " %%mm7, 56(%0,%2,2) "
: : " r " ( d ) , " r " ( s1 ) , " r " ( x )
: " memory " ) ;
}
for ( ; x < w ; x + + ) d [ 2 * x ] = d [ 2 * x + 1 ] = s1 [ x ] ;
@ -2007,11 +1998,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
x = 0 ;
for ( ; x < w - 31 ; x + = 32 ) {
__asm__ volatile (
PREFETCH " 32%1 \n \t "
" movq %1, %%mm0 \n \t "
" movq 8%1, %%mm2 \n \t "
" movq 16%1, %%mm4 \n \t "
" movq 24%1, %%mm6 \n \t "
PREFETCH " 32( %1,%2) \n \t "
" movq ( %1,%2) , %%mm0 \n \t "
" movq 8( %1,%2) , %%mm2 \n \t "
" movq 16( %1,%2) , %%mm4 \n \t "
" movq 24( %1,%2) , %%mm6 \n \t "
" movq %%mm0, %%mm1 \n \t "
" movq %%mm2, %%mm3 \n \t "
" movq %%mm4, %%mm5 \n \t "
@ -2024,16 +2015,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
" punpckhbw %%mm5, %%mm5 \n \t "
" punpcklbw %%mm6, %%mm6 \n \t "
" punpckhbw %%mm7, %%mm7 \n \t "
MOVNTQ " %%mm0, %0 \n \t "
MOVNTQ " %%mm1, 8%0 \n \t "
MOVNTQ " %%mm2, 16%0 \n \t "
MOVNTQ " %%mm3, 24%0 \n \t "
MOVNTQ " %%mm4, 32%0 \n \t "
MOVNTQ " %%mm5, 40%0 \n \t "
MOVNTQ " %%mm6, 48%0 \n \t "
MOVNTQ " %%mm7, 56%0 "
: " =m " ( d [ 2 * x ] )
: " m " ( s2 [ x ] )
MOVNTQ " %%mm0, (%0,%2,2) \n \t "
MOVNTQ " %%mm1, 8(%0,%2,2) \n \t "
MOVNTQ " %%mm2, 16(%0,%2,2) \n \t "
MOVNTQ " %%mm3, 24(%0,%2,2) \n \t "
MOVNTQ " %%mm4, 32(%0,%2,2) \n \t "
MOVNTQ " %%mm5, 40(%0,%2,2) \n \t "
MOVNTQ " %%mm6, 48(%0,%2,2) \n \t "
MOVNTQ " %%mm7, 56(%0,%2,2) "
: : " r " ( d ) , " r " ( s2 ) , " r " ( x )
: " memory " ) ;
}
for ( ; x < w ; x + + ) d [ 2 * x ] = d [ 2 * x + 1 ] = s2 [ x ] ;