|
|
|
@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
__asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movd %1, %%mm0 \n\t" |
|
|
|
|
"punpckldq 3%1, %%mm0 \n\t" |
|
|
|
|
"movd 6%1, %%mm1 \n\t" |
|
|
|
|
"punpckldq 9%1, %%mm1 \n\t" |
|
|
|
|
"movd 12%1, %%mm2 \n\t" |
|
|
|
|
"punpckldq 15%1, %%mm2 \n\t" |
|
|
|
|
"movd 18%1, %%mm3 \n\t" |
|
|
|
|
"punpckldq 21%1, %%mm3 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movd (%1), %%mm0 \n\t" |
|
|
|
|
"punpckldq 3(%1), %%mm0 \n\t" |
|
|
|
|
"movd 6(%1), %%mm1 \n\t" |
|
|
|
|
"punpckldq 9(%1), %%mm1 \n\t" |
|
|
|
|
"movd 12(%1), %%mm2 \n\t" |
|
|
|
|
"punpckldq 15(%1), %%mm2 \n\t" |
|
|
|
|
"movd 18(%1), %%mm3 \n\t" |
|
|
|
|
"punpckldq 21(%1), %%mm3 \n\t" |
|
|
|
|
"por %%mm7, %%mm0 \n\t" |
|
|
|
|
"por %%mm7, %%mm1 \n\t" |
|
|
|
|
"por %%mm7, %%mm2 \n\t" |
|
|
|
|
"por %%mm7, %%mm3 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
MOVNTQ" %%mm1, 8%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm2, 16%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm3, 24%0" |
|
|
|
|
:"=m"(*dest) |
|
|
|
|
:"m"(*s) |
|
|
|
|
MOVNTQ" %%mm0, (%0) \n\t" |
|
|
|
|
MOVNTQ" %%mm1, 8(%0) \n\t" |
|
|
|
|
MOVNTQ" %%mm2, 16(%0) \n\t" |
|
|
|
|
MOVNTQ" %%mm3, 24(%0)" |
|
|
|
|
:: "r"(dest), "r"(s) |
|
|
|
|
:"memory"); |
|
|
|
|
dest += 32; |
|
|
|
|
s += 24; |
|
|
|
@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
"pand "MANGLE(mask24hhhh)", %%mm5\n\t" \
|
|
|
|
|
"por %%mm5, %%mm4 \n\t" \
|
|
|
|
|
\
|
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" \
|
|
|
|
|
MOVNTQ" %%mm1, 8%0 \n\t" \
|
|
|
|
|
MOVNTQ" %%mm4, 16%0" |
|
|
|
|
MOVNTQ" %%mm0, (%0) \n\t" \
|
|
|
|
|
MOVNTQ" %%mm1, 8(%0) \n\t" \
|
|
|
|
|
MOVNTQ" %%mm4, 16(%0)" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) |
|
|
|
@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
mm_end = end - 31; |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movq %1, %%mm0 \n\t" |
|
|
|
|
"movq 8%1, %%mm1 \n\t" |
|
|
|
|
"movq 16%1, %%mm4 \n\t" |
|
|
|
|
"movq 24%1, %%mm5 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movq (%1), %%mm0 \n\t" |
|
|
|
|
"movq 8(%1), %%mm1 \n\t" |
|
|
|
|
"movq 16(%1), %%mm4 \n\t" |
|
|
|
|
"movq 24(%1), %%mm5 \n\t" |
|
|
|
|
"movq %%mm0, %%mm2 \n\t" |
|
|
|
|
"movq %%mm1, %%mm3 \n\t" |
|
|
|
|
"movq %%mm4, %%mm6 \n\t" |
|
|
|
|
"movq %%mm5, %%mm7 \n\t" |
|
|
|
|
STORE_BGR24_MMX |
|
|
|
|
:"=m"(*dest) |
|
|
|
|
:"m"(*s) |
|
|
|
|
:: "r"(dest), "r"(s) |
|
|
|
|
:"memory"); |
|
|
|
|
dest += 24; |
|
|
|
|
s += 32; |
|
|
|
@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
mm_end = end - 15; |
|
|
|
|
while (s<mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movq %1, %%mm0 \n\t" |
|
|
|
|
"movq 8%1, %%mm2 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movq (%1), %%mm0 \n\t" |
|
|
|
|
"movq 8(%1), %%mm2 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm2, %%mm3 \n\t" |
|
|
|
|
"pand %%mm4, %%mm0 \n\t" |
|
|
|
|
"pand %%mm4, %%mm2 \n\t" |
|
|
|
|
"paddw %%mm1, %%mm0 \n\t" |
|
|
|
|
"paddw %%mm3, %%mm2 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
MOVNTQ" %%mm2, 8%0" |
|
|
|
|
:"=m"(*d) |
|
|
|
|
:"m"(*s) |
|
|
|
|
MOVNTQ" %%mm0, (%0) \n\t" |
|
|
|
|
MOVNTQ" %%mm2, 8(%0)" |
|
|
|
|
:: "r"(d), "r"(s) |
|
|
|
|
); |
|
|
|
|
d+=16; |
|
|
|
|
s+=16; |
|
|
|
@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
mm_end = end - 15; |
|
|
|
|
while (s<mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movq %1, %%mm0 \n\t" |
|
|
|
|
"movq 8%1, %%mm2 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movq (%1), %%mm0 \n\t" |
|
|
|
|
"movq 8(%1), %%mm2 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm2, %%mm3 \n\t" |
|
|
|
|
"psrlq $1, %%mm0 \n\t" |
|
|
|
@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
"pand %%mm6, %%mm3 \n\t" |
|
|
|
|
"por %%mm1, %%mm0 \n\t" |
|
|
|
|
"por %%mm3, %%mm2 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
MOVNTQ" %%mm2, 8%0" |
|
|
|
|
:"=m"(*d) |
|
|
|
|
:"m"(*s) |
|
|
|
|
MOVNTQ" %%mm0, (%0) \n\t" |
|
|
|
|
MOVNTQ" %%mm2, 8(%0)" |
|
|
|
|
:: "r"(d), "r"(s) |
|
|
|
|
); |
|
|
|
|
d+=16; |
|
|
|
|
s+=16; |
|
|
|
@ -287,7 +283,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
uint16_t *d = (uint16_t *)dst; |
|
|
|
|
end = s + src_size; |
|
|
|
|
mm_end = end - 15; |
|
|
|
|
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
|
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movq %3, %%mm5 \n\t" |
|
|
|
|
"movq %4, %%mm6 \n\t" |
|
|
|
@ -322,47 +317,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
: "+r" (d), "+r"(s) |
|
|
|
|
: "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) |
|
|
|
|
); |
|
|
|
|
#else |
|
|
|
|
__asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movq %0, %%mm7 \n\t" |
|
|
|
|
"movq %1, %%mm6 \n\t" |
|
|
|
|
::"m"(red_16mask),"m"(green_16mask)); |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movd %1, %%mm0 \n\t" |
|
|
|
|
"movd 4%1, %%mm3 \n\t" |
|
|
|
|
"punpckldq 8%1, %%mm0 \n\t" |
|
|
|
|
"punpckldq 12%1, %%mm3 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm0, %%mm2 \n\t" |
|
|
|
|
"movq %%mm3, %%mm4 \n\t" |
|
|
|
|
"movq %%mm3, %%mm5 \n\t" |
|
|
|
|
"psrlq $3, %%mm0 \n\t" |
|
|
|
|
"psrlq $3, %%mm3 \n\t" |
|
|
|
|
"pand %2, %%mm0 \n\t" |
|
|
|
|
"pand %2, %%mm3 \n\t" |
|
|
|
|
"psrlq $5, %%mm1 \n\t" |
|
|
|
|
"psrlq $5, %%mm4 \n\t" |
|
|
|
|
"pand %%mm6, %%mm1 \n\t" |
|
|
|
|
"pand %%mm6, %%mm4 \n\t" |
|
|
|
|
"psrlq $8, %%mm2 \n\t" |
|
|
|
|
"psrlq $8, %%mm5 \n\t" |
|
|
|
|
"pand %%mm7, %%mm2 \n\t" |
|
|
|
|
"pand %%mm7, %%mm5 \n\t" |
|
|
|
|
"por %%mm1, %%mm0 \n\t" |
|
|
|
|
"por %%mm4, %%mm3 \n\t" |
|
|
|
|
"por %%mm2, %%mm0 \n\t" |
|
|
|
|
"por %%mm5, %%mm3 \n\t" |
|
|
|
|
"psllq $16, %%mm3 \n\t" |
|
|
|
|
"por %%mm3, %%mm0 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
|
|
|
|
d += 4; |
|
|
|
|
s += 16; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
__asm__ volatile(SFENCE:::"memory"); |
|
|
|
|
__asm__ volatile(EMMS:::"memory"); |
|
|
|
|
while (s < end) { |
|
|
|
@ -386,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
mm_end = end - 15; |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movd %1, %%mm0 \n\t" |
|
|
|
|
"movd 4%1, %%mm3 \n\t" |
|
|
|
|
"punpckldq 8%1, %%mm0 \n\t" |
|
|
|
|
"punpckldq 12%1, %%mm3 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movd (%1), %%mm0 \n\t" |
|
|
|
|
"movd 4(%1), %%mm3 \n\t" |
|
|
|
|
"punpckldq 8(%1), %%mm0 \n\t" |
|
|
|
|
"punpckldq 12(%1), %%mm3 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm0, %%mm2 \n\t" |
|
|
|
|
"movq %%mm3, %%mm4 \n\t" |
|
|
|
@ -413,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
"por %%mm5, %%mm3 \n\t" |
|
|
|
|
"psllq $16, %%mm3 \n\t" |
|
|
|
|
"por %%mm3, %%mm0 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
|
|
|
|
MOVNTQ" %%mm0, (%0) \n\t" |
|
|
|
|
:: "r"(d),"r"(s),"m"(blue_16mask):"memory"); |
|
|
|
|
d += 4; |
|
|
|
|
s += 16; |
|
|
|
|
} |
|
|
|
@ -434,7 +388,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
uint16_t *d = (uint16_t *)dst; |
|
|
|
|
end = s + src_size; |
|
|
|
|
mm_end = end - 15; |
|
|
|
|
#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster)
|
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movq %3, %%mm5 \n\t" |
|
|
|
|
"movq %4, %%mm6 \n\t" |
|
|
|
@ -469,47 +422,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
: "+r" (d), "+r"(s) |
|
|
|
|
: "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) |
|
|
|
|
); |
|
|
|
|
#else |
|
|
|
|
__asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
|
|
|
|
__asm__ volatile( |
|
|
|
|
"movq %0, %%mm7 \n\t" |
|
|
|
|
"movq %1, %%mm6 \n\t" |
|
|
|
|
::"m"(red_15mask),"m"(green_15mask)); |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movd %1, %%mm0 \n\t" |
|
|
|
|
"movd 4%1, %%mm3 \n\t" |
|
|
|
|
"punpckldq 8%1, %%mm0 \n\t" |
|
|
|
|
"punpckldq 12%1, %%mm3 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm0, %%mm2 \n\t" |
|
|
|
|
"movq %%mm3, %%mm4 \n\t" |
|
|
|
|
"movq %%mm3, %%mm5 \n\t" |
|
|
|
|
"psrlq $3, %%mm0 \n\t" |
|
|
|
|
"psrlq $3, %%mm3 \n\t" |
|
|
|
|
"pand %2, %%mm0 \n\t" |
|
|
|
|
"pand %2, %%mm3 \n\t" |
|
|
|
|
"psrlq $6, %%mm1 \n\t" |
|
|
|
|
"psrlq $6, %%mm4 \n\t" |
|
|
|
|
"pand %%mm6, %%mm1 \n\t" |
|
|
|
|
"pand %%mm6, %%mm4 \n\t" |
|
|
|
|
"psrlq $9, %%mm2 \n\t" |
|
|
|
|
"psrlq $9, %%mm5 \n\t" |
|
|
|
|
"pand %%mm7, %%mm2 \n\t" |
|
|
|
|
"pand %%mm7, %%mm5 \n\t" |
|
|
|
|
"por %%mm1, %%mm0 \n\t" |
|
|
|
|
"por %%mm4, %%mm3 \n\t" |
|
|
|
|
"por %%mm2, %%mm0 \n\t" |
|
|
|
|
"por %%mm5, %%mm3 \n\t" |
|
|
|
|
"psllq $16, %%mm3 \n\t" |
|
|
|
|
"por %%mm3, %%mm0 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
|
|
|
|
d += 4; |
|
|
|
|
s += 16; |
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
__asm__ volatile(SFENCE:::"memory"); |
|
|
|
|
__asm__ volatile(EMMS:::"memory"); |
|
|
|
|
while (s < end) { |
|
|
|
@ -533,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
mm_end = end - 15; |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movd %1, %%mm0 \n\t" |
|
|
|
|
"movd 4%1, %%mm3 \n\t" |
|
|
|
|
"punpckldq 8%1, %%mm0 \n\t" |
|
|
|
|
"punpckldq 12%1, %%mm3 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movd (%1), %%mm0 \n\t" |
|
|
|
|
"movd 4(%1), %%mm3 \n\t" |
|
|
|
|
"punpckldq 8(%1), %%mm0 \n\t" |
|
|
|
|
"punpckldq 12(%1), %%mm3 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm0, %%mm2 \n\t" |
|
|
|
|
"movq %%mm3, %%mm4 \n\t" |
|
|
|
@ -560,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
"por %%mm5, %%mm3 \n\t" |
|
|
|
|
"psllq $16, %%mm3 \n\t" |
|
|
|
|
"por %%mm3, %%mm0 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
|
|
|
|
MOVNTQ" %%mm0, (%0) \n\t" |
|
|
|
|
::"r"(d),"r"(s),"m"(blue_15mask):"memory"); |
|
|
|
|
d += 4; |
|
|
|
|
s += 16; |
|
|
|
|
} |
|
|
|
@ -588,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
mm_end = end - 11; |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movd %1, %%mm0 \n\t" |
|
|
|
|
"movd 3%1, %%mm3 \n\t" |
|
|
|
|
"punpckldq 6%1, %%mm0 \n\t" |
|
|
|
|
"punpckldq 9%1, %%mm3 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movd (%1), %%mm0 \n\t" |
|
|
|
|
"movd 3(%1), %%mm3 \n\t" |
|
|
|
|
"punpckldq 6(%1), %%mm0 \n\t" |
|
|
|
|
"punpckldq 9(%1), %%mm3 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm0, %%mm2 \n\t" |
|
|
|
|
"movq %%mm3, %%mm4 \n\t" |
|
|
|
@ -615,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
"por %%mm5, %%mm3 \n\t" |
|
|
|
|
"psllq $16, %%mm3 \n\t" |
|
|
|
|
"por %%mm3, %%mm0 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
|
|
|
|
MOVNTQ" %%mm0, (%0) \n\t" |
|
|
|
|
::"r"(d),"r"(s),"m"(blue_16mask):"memory"); |
|
|
|
|
d += 4; |
|
|
|
|
s += 12; |
|
|
|
|
} |
|
|
|
@ -645,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
mm_end = end - 15; |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movd %1, %%mm0 \n\t" |
|
|
|
|
"movd 3%1, %%mm3 \n\t" |
|
|
|
|
"punpckldq 6%1, %%mm0 \n\t" |
|
|
|
|
"punpckldq 9%1, %%mm3 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movd (%1), %%mm0 \n\t" |
|
|
|
|
"movd 3(%1), %%mm3 \n\t" |
|
|
|
|
"punpckldq 6(%1), %%mm0 \n\t" |
|
|
|
|
"punpckldq 9(%1), %%mm3 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm0, %%mm2 \n\t" |
|
|
|
|
"movq %%mm3, %%mm4 \n\t" |
|
|
|
@ -672,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
"por %%mm5, %%mm3 \n\t" |
|
|
|
|
"psllq $16, %%mm3 \n\t" |
|
|
|
|
"por %%mm3, %%mm0 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
:"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); |
|
|
|
|
MOVNTQ" %%mm0, (%0) \n\t" |
|
|
|
|
::"r"(d),"r"(s),"m"(blue_16mask):"memory"); |
|
|
|
|
d += 4; |
|
|
|
|
s += 12; |
|
|
|
|
} |
|
|
|
@ -702,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
mm_end = end - 11; |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movd %1, %%mm0 \n\t" |
|
|
|
|
"movd 3%1, %%mm3 \n\t" |
|
|
|
|
"punpckldq 6%1, %%mm0 \n\t" |
|
|
|
|
"punpckldq 9%1, %%mm3 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movd (%1), %%mm0 \n\t" |
|
|
|
|
"movd 3(%1), %%mm3 \n\t" |
|
|
|
|
"punpckldq 6(%1), %%mm0 \n\t" |
|
|
|
|
"punpckldq 9(%1), %%mm3 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm0, %%mm2 \n\t" |
|
|
|
|
"movq %%mm3, %%mm4 \n\t" |
|
|
|
@ -729,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
"por %%mm5, %%mm3 \n\t" |
|
|
|
|
"psllq $16, %%mm3 \n\t" |
|
|
|
|
"por %%mm3, %%mm0 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
|
|
|
|
MOVNTQ" %%mm0, (%0) \n\t" |
|
|
|
|
::"r"(d),"r"(s),"m"(blue_15mask):"memory"); |
|
|
|
|
d += 4; |
|
|
|
|
s += 12; |
|
|
|
|
} |
|
|
|
@ -759,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
mm_end = end - 15; |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movd %1, %%mm0 \n\t" |
|
|
|
|
"movd 3%1, %%mm3 \n\t" |
|
|
|
|
"punpckldq 6%1, %%mm0 \n\t" |
|
|
|
|
"punpckldq 9%1, %%mm3 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movd (%1), %%mm0 \n\t" |
|
|
|
|
"movd 3(%1), %%mm3 \n\t" |
|
|
|
|
"punpckldq 6(%1), %%mm0 \n\t" |
|
|
|
|
"punpckldq 9(%1), %%mm3 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm0, %%mm2 \n\t" |
|
|
|
|
"movq %%mm3, %%mm4 \n\t" |
|
|
|
@ -786,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
"por %%mm5, %%mm3 \n\t" |
|
|
|
|
"psllq $16, %%mm3 \n\t" |
|
|
|
|
"por %%mm3, %%mm0 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
:"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); |
|
|
|
|
MOVNTQ" %%mm0, (%0) \n\t" |
|
|
|
|
::"r"(d),"r"(s),"m"(blue_15mask):"memory"); |
|
|
|
|
d += 4; |
|
|
|
|
s += 12; |
|
|
|
|
} |
|
|
|
@ -812,10 +724,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
mm_end = end - 7; |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movq %1, %%mm0 \n\t" |
|
|
|
|
"movq %1, %%mm1 \n\t" |
|
|
|
|
"movq %1, %%mm2 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movq (%1), %%mm0 \n\t" |
|
|
|
|
"movq (%1), %%mm1 \n\t" |
|
|
|
|
"movq (%1), %%mm2 \n\t" |
|
|
|
|
"pand %2, %%mm0 \n\t" |
|
|
|
|
"pand %3, %%mm1 \n\t" |
|
|
|
|
"pand %4, %%mm2 \n\t" |
|
|
|
@ -844,9 +756,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
"movq %%mm0, %%mm6 \n\t" |
|
|
|
|
"movq %%mm3, %%mm7 \n\t" |
|
|
|
|
|
|
|
|
|
"movq 8%1, %%mm0 \n\t" |
|
|
|
|
"movq 8%1, %%mm1 \n\t" |
|
|
|
|
"movq 8%1, %%mm2 \n\t" |
|
|
|
|
"movq 8(%1), %%mm0 \n\t" |
|
|
|
|
"movq 8(%1), %%mm1 \n\t" |
|
|
|
|
"movq 8(%1), %%mm2 \n\t" |
|
|
|
|
"pand %2, %%mm0 \n\t" |
|
|
|
|
"pand %3, %%mm1 \n\t" |
|
|
|
|
"pand %4, %%mm2 \n\t" |
|
|
|
@ -873,7 +785,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
"por %%mm5, %%mm3 \n\t" |
|
|
|
|
|
|
|
|
|
:"=m"(*d) |
|
|
|
|
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mmx_null) |
|
|
|
|
:"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) |
|
|
|
|
:"memory"); |
|
|
|
|
/* borrowed 32 to 24 */ |
|
|
|
|
__asm__ volatile( |
|
|
|
@ -889,8 +801,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
|
|
|
|
|
STORE_BGR24_MMX |
|
|
|
|
|
|
|
|
|
:"=m"(*d) |
|
|
|
|
:"m"(*s) |
|
|
|
|
:: "r"(d), "m"(*s) |
|
|
|
|
:"memory"); |
|
|
|
|
d += 24; |
|
|
|
|
s += 8; |
|
|
|
@ -917,10 +828,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
mm_end = end - 7; |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movq %1, %%mm0 \n\t" |
|
|
|
|
"movq %1, %%mm1 \n\t" |
|
|
|
|
"movq %1, %%mm2 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movq (%1), %%mm0 \n\t" |
|
|
|
|
"movq (%1), %%mm1 \n\t" |
|
|
|
|
"movq (%1), %%mm2 \n\t" |
|
|
|
|
"pand %2, %%mm0 \n\t" |
|
|
|
|
"pand %3, %%mm1 \n\t" |
|
|
|
|
"pand %4, %%mm2 \n\t" |
|
|
|
@ -950,9 +861,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
"movq %%mm0, %%mm6 \n\t" |
|
|
|
|
"movq %%mm3, %%mm7 \n\t" |
|
|
|
|
|
|
|
|
|
"movq 8%1, %%mm0 \n\t" |
|
|
|
|
"movq 8%1, %%mm1 \n\t" |
|
|
|
|
"movq 8%1, %%mm2 \n\t" |
|
|
|
|
"movq 8(%1), %%mm0 \n\t" |
|
|
|
|
"movq 8(%1), %%mm1 \n\t" |
|
|
|
|
"movq 8(%1), %%mm2 \n\t" |
|
|
|
|
"pand %2, %%mm0 \n\t" |
|
|
|
|
"pand %3, %%mm1 \n\t" |
|
|
|
|
"pand %4, %%mm2 \n\t" |
|
|
|
@ -979,7 +890,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
"por %%mm4, %%mm3 \n\t" |
|
|
|
|
"por %%mm5, %%mm3 \n\t" |
|
|
|
|
:"=m"(*d) |
|
|
|
|
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) |
|
|
|
|
:"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) |
|
|
|
|
:"memory"); |
|
|
|
|
/* borrowed 32 to 24 */ |
|
|
|
|
__asm__ volatile( |
|
|
|
@ -995,8 +906,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
|
|
|
|
|
STORE_BGR24_MMX |
|
|
|
|
|
|
|
|
|
:"=m"(*d) |
|
|
|
|
:"m"(*s) |
|
|
|
|
:: "r"(d), "m"(*s) |
|
|
|
|
:"memory"); |
|
|
|
|
d += 24; |
|
|
|
|
s += 8; |
|
|
|
@ -1028,8 +938,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr |
|
|
|
|
"movq %%mm0, %%mm3 \n\t" \
|
|
|
|
|
"punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \
|
|
|
|
|
"punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \
|
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" \
|
|
|
|
|
MOVNTQ" %%mm3, 8%0 \n\t" \
|
|
|
|
|
MOVNTQ" %%mm0, (%0) \n\t" \
|
|
|
|
|
MOVNTQ" %%mm3, 8(%0) \n\t" \
|
|
|
|
|
|
|
|
|
|
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size) |
|
|
|
|
{ |
|
|
|
@ -1044,10 +954,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
mm_end = end - 3; |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movq %1, %%mm0 \n\t" |
|
|
|
|
"movq %1, %%mm1 \n\t" |
|
|
|
|
"movq %1, %%mm2 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movq (%1), %%mm0 \n\t" |
|
|
|
|
"movq (%1), %%mm1 \n\t" |
|
|
|
|
"movq (%1), %%mm2 \n\t" |
|
|
|
|
"pand %2, %%mm0 \n\t" |
|
|
|
|
"pand %3, %%mm1 \n\t" |
|
|
|
|
"pand %4, %%mm2 \n\t" |
|
|
|
@ -1056,8 +966,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
"pmulhw %5, %%mm1 \n\t" |
|
|
|
|
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" |
|
|
|
|
PACK_RGB32 |
|
|
|
|
:"=m"(*d) |
|
|
|
|
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mul15_mid) |
|
|
|
|
::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid) |
|
|
|
|
:"memory"); |
|
|
|
|
d += 16; |
|
|
|
|
s += 4; |
|
|
|
@ -1087,10 +996,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
mm_end = end - 3; |
|
|
|
|
while (s < mm_end) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movq %1, %%mm0 \n\t" |
|
|
|
|
"movq %1, %%mm1 \n\t" |
|
|
|
|
"movq %1, %%mm2 \n\t" |
|
|
|
|
PREFETCH" 32(%1) \n\t" |
|
|
|
|
"movq (%1), %%mm0 \n\t" |
|
|
|
|
"movq (%1), %%mm1 \n\t" |
|
|
|
|
"movq (%1), %%mm2 \n\t" |
|
|
|
|
"pand %2, %%mm0 \n\t" |
|
|
|
|
"pand %3, %%mm1 \n\t" |
|
|
|
|
"pand %4, %%mm2 \n\t" |
|
|
|
@ -1100,8 +1009,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s |
|
|
|
|
"pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" |
|
|
|
|
"pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" |
|
|
|
|
PACK_RGB32 |
|
|
|
|
:"=m"(*d) |
|
|
|
|
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) |
|
|
|
|
::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) |
|
|
|
|
:"memory"); |
|
|
|
|
d += 16; |
|
|
|
|
s += 4; |
|
|
|
@ -2029,8 +1937,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
|
|
|
|
int srcStride1, int srcStride2, |
|
|
|
|
int dstStride1, int dstStride2) |
|
|
|
|
{ |
|
|
|
|
x86_reg y; |
|
|
|
|
int x,w,h; |
|
|
|
|
x86_reg x, y; |
|
|
|
|
int w,h; |
|
|
|
|
w=width/2; h=height/2; |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" %0 \n\t" |
|
|
|
@ -2042,11 +1950,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
|
|
|
|
x=0; |
|
|
|
|
for (;x<w-31;x+=32) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movq %1, %%mm0 \n\t" |
|
|
|
|
"movq 8%1, %%mm2 \n\t" |
|
|
|
|
"movq 16%1, %%mm4 \n\t" |
|
|
|
|
"movq 24%1, %%mm6 \n\t" |
|
|
|
|
PREFETCH" 32(%1,%2) \n\t" |
|
|
|
|
"movq (%1,%2), %%mm0 \n\t" |
|
|
|
|
"movq 8(%1,%2), %%mm2 \n\t" |
|
|
|
|
"movq 16(%1,%2), %%mm4 \n\t" |
|
|
|
|
"movq 24(%1,%2), %%mm6 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm2, %%mm3 \n\t" |
|
|
|
|
"movq %%mm4, %%mm5 \n\t" |
|
|
|
@ -2059,16 +1967,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
|
|
|
|
"punpckhbw %%mm5, %%mm5 \n\t" |
|
|
|
|
"punpcklbw %%mm6, %%mm6 \n\t" |
|
|
|
|
"punpckhbw %%mm7, %%mm7 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
MOVNTQ" %%mm1, 8%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm2, 16%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm3, 24%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm4, 32%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm5, 40%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm6, 48%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm7, 56%0" |
|
|
|
|
:"=m"(d[2*x]) |
|
|
|
|
:"m"(s1[x]) |
|
|
|
|
MOVNTQ" %%mm0, (%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm7, 56(%0,%2,2)" |
|
|
|
|
:: "r"(d), "r"(s1), "r"(x) |
|
|
|
|
:"memory"); |
|
|
|
|
} |
|
|
|
|
for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; |
|
|
|
@ -2079,11 +1986,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
|
|
|
|
x=0; |
|
|
|
|
for (;x<w-31;x+=32) { |
|
|
|
|
__asm__ volatile( |
|
|
|
|
PREFETCH" 32%1 \n\t" |
|
|
|
|
"movq %1, %%mm0 \n\t" |
|
|
|
|
"movq 8%1, %%mm2 \n\t" |
|
|
|
|
"movq 16%1, %%mm4 \n\t" |
|
|
|
|
"movq 24%1, %%mm6 \n\t" |
|
|
|
|
PREFETCH" 32(%1,%2) \n\t" |
|
|
|
|
"movq (%1,%2), %%mm0 \n\t" |
|
|
|
|
"movq 8(%1,%2), %%mm2 \n\t" |
|
|
|
|
"movq 16(%1,%2), %%mm4 \n\t" |
|
|
|
|
"movq 24(%1,%2), %%mm6 \n\t" |
|
|
|
|
"movq %%mm0, %%mm1 \n\t" |
|
|
|
|
"movq %%mm2, %%mm3 \n\t" |
|
|
|
|
"movq %%mm4, %%mm5 \n\t" |
|
|
|
@ -2096,16 +2003,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, |
|
|
|
|
"punpckhbw %%mm5, %%mm5 \n\t" |
|
|
|
|
"punpcklbw %%mm6, %%mm6 \n\t" |
|
|
|
|
"punpckhbw %%mm7, %%mm7 \n\t" |
|
|
|
|
MOVNTQ" %%mm0, %0 \n\t" |
|
|
|
|
MOVNTQ" %%mm1, 8%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm2, 16%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm3, 24%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm4, 32%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm5, 40%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm6, 48%0 \n\t" |
|
|
|
|
MOVNTQ" %%mm7, 56%0" |
|
|
|
|
:"=m"(d[2*x]) |
|
|
|
|
:"m"(s2[x]) |
|
|
|
|
MOVNTQ" %%mm0, (%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" |
|
|
|
|
MOVNTQ" %%mm7, 56(%0,%2,2)" |
|
|
|
|
:: "r"(d), "r"(s2), "r"(x) |
|
|
|
|
:"memory"); |
|
|
|
|
} |
|
|
|
|
for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; |
|
|
|
|