sync with mplayer xp

- partial yvu9 support (copy only)
- rgb 15/16 -> 24/32 converters
- int->unsigned changes

Originally committed as revision 6493 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
pull/126/head
Arpi 23 years ago
parent d661d18d89
commit 0d9f3d85f6
  1. 124
      postproc/rgb2rgb.c
  2. 19
      postproc/rgb2rgb.h
  3. 690
      postproc/rgb2rgb_template.c
  4. 531
      postproc/swscale.c
  5. 2
      postproc/swscale_template.c
  6. 10
      postproc/yuv2rgb.c
  7. 14
      postproc/yuv2rgb_mlib.c
  8. 18
      postproc/yuv2rgb_template.c

@ -20,6 +20,8 @@
#define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit
#ifdef CAN_COMPILE_X86_ASM #ifdef CAN_COMPILE_X86_ASM
static const uint64_t mmx_null __attribute__((aligned(8))) = 0x0000000000000000ULL;
static const uint64_t mmx_one __attribute__((aligned(8))) = 0xFFFFFFFFFFFFFFFFULL;
static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFULL; static const uint64_t mask32b __attribute__((aligned(8))) = 0x000000FF000000FFULL;
static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL; static const uint64_t mask32g __attribute__((aligned(8))) = 0x0000FF000000FF00ULL;
static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL; static const uint64_t mask32r __attribute__((aligned(8))) = 0x00FF000000FF0000ULL;
@ -35,6 +37,11 @@ static const uint64_t mask24hhhh __attribute__((aligned(8))) = 0xffffffffffff00
static const uint64_t mask15b __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */ static const uint64_t mask15b __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111 xxB */
static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */ static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000 RGx */
static const uint64_t mask15s __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL; static const uint64_t mask15s __attribute__((aligned(8))) = 0xFFE0FFE0FFE0FFE0ULL;
static const uint64_t mask15g __attribute__((aligned(8))) = 0x03E003E003E003E0ULL;
static const uint64_t mask15r __attribute__((aligned(8))) = 0x7C007C007C007C00ULL;
#define mask16b mask15b
static const uint64_t mask16g __attribute__((aligned(8))) = 0x07E007E007E007E0ULL;
static const uint64_t mask16r __attribute__((aligned(8))) = 0xF800F800F800F800ULL;
static const uint64_t red_16mask __attribute__((aligned(8))) = 0x0000f8000000f800ULL; static const uint64_t red_16mask __attribute__((aligned(8))) = 0x0000f8000000f800ULL;
static const uint64_t green_16mask __attribute__((aligned(8)))= 0x000007e0000007e0ULL; static const uint64_t green_16mask __attribute__((aligned(8)))= 0x000007e0000007e0ULL;
static const uint64_t blue_16mask __attribute__((aligned(8))) = 0x0000001f0000001fULL; static const uint64_t blue_16mask __attribute__((aligned(8))) = 0x0000001f0000001fULL;
@ -137,10 +144,68 @@ void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
rgb24to32_MMX(src, dst, src_size); rgb24to32_MMX(src, dst, src_size);
else else
#endif
rgb24to32_C(src, dst, src_size); rgb24to32_C(src, dst, src_size);
#else }
rgb24to32_C(src, dst, src_size);
void rgb15to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
{
#ifdef CAN_COMPILE_X86_ASM
// ordered per speed fasterst first
if(gCpuCaps.hasMMX2)
rgb15to24_MMX2(src, dst, src_size);
else if(gCpuCaps.has3DNow)
rgb15to24_3DNow(src, dst, src_size);
else if(gCpuCaps.hasMMX)
rgb15to24_MMX(src, dst, src_size);
else
#endif #endif
rgb15to24_C(src, dst, src_size);
}
void rgb16to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
{
#ifdef CAN_COMPILE_X86_ASM
// ordered per speed fasterst first
if(gCpuCaps.hasMMX2)
rgb16to24_MMX2(src, dst, src_size);
else if(gCpuCaps.has3DNow)
rgb16to24_3DNow(src, dst, src_size);
else if(gCpuCaps.hasMMX)
rgb16to24_MMX(src, dst, src_size);
else
#endif
rgb16to24_C(src, dst, src_size);
}
void rgb15to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
{
#ifdef CAN_COMPILE_X86_ASM
// ordered per speed fasterst first
if(gCpuCaps.hasMMX2)
rgb15to32_MMX2(src, dst, src_size);
else if(gCpuCaps.has3DNow)
rgb15to32_3DNow(src, dst, src_size);
else if(gCpuCaps.hasMMX)
rgb15to32_MMX(src, dst, src_size);
else
#endif
rgb15to32_C(src, dst, src_size);
}
void rgb16to32(const uint8_t *src,uint8_t *dst,unsigned src_size)
{
#ifdef CAN_COMPILE_X86_ASM
// ordered per speed fasterst first
if(gCpuCaps.hasMMX2)
rgb16to32_MMX2(src, dst, src_size);
else if(gCpuCaps.has3DNow)
rgb16to32_3DNow(src, dst, src_size);
else if(gCpuCaps.hasMMX)
rgb16to32_MMX(src, dst, src_size);
else
#endif
rgb16to32_C(src, dst, src_size);
} }
void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size) void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
@ -154,10 +219,8 @@ void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size)
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
rgb32to24_MMX(src, dst, src_size); rgb32to24_MMX(src, dst, src_size);
else else
rgb32to24_C(src, dst, src_size);
#else
rgb32to24_C(src, dst, src_size);
#endif #endif
rgb32to24_C(src, dst, src_size);
} }
/* /*
@ -177,10 +240,8 @@ void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size)
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
rgb15to16_MMX(src, dst, src_size); rgb15to16_MMX(src, dst, src_size);
else else
rgb15to16_C(src, dst, src_size);
#else
rgb15to16_C(src, dst, src_size);
#endif #endif
rgb15to16_C(src, dst, src_size);
} }
/** /**
@ -242,10 +303,8 @@ void rgb32to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
rgb32to16_MMX(src, dst, src_size); rgb32to16_MMX(src, dst, src_size);
else else
rgb32to16_C(src, dst, src_size);
#else
rgb32to16_C(src, dst, src_size);
#endif #endif
rgb32to16_C(src, dst, src_size);
} }
void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size) void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
@ -259,10 +318,8 @@ void rgb32to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
rgb32to15_MMX(src, dst, src_size); rgb32to15_MMX(src, dst, src_size);
else else
rgb32to15_C(src, dst, src_size);
#else
rgb32to15_C(src, dst, src_size);
#endif #endif
rgb32to15_C(src, dst, src_size);
} }
void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size) void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
@ -276,10 +333,8 @@ void rgb24to16(const uint8_t *src, uint8_t *dst, unsigned src_size)
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
rgb24to16_MMX(src, dst, src_size); rgb24to16_MMX(src, dst, src_size);
else else
rgb24to16_C(src, dst, src_size);
#else
rgb24to16_C(src, dst, src_size);
#endif #endif
rgb24to16_C(src, dst, src_size);
} }
void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size) void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
@ -293,10 +348,8 @@ void rgb24to15(const uint8_t *src, uint8_t *dst, unsigned src_size)
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
rgb24to15_MMX(src, dst, src_size); rgb24to15_MMX(src, dst, src_size);
else else
rgb24to15_C(src, dst, src_size);
#else
rgb24to15_C(src, dst, src_size);
#endif #endif
rgb24to15_C(src, dst, src_size);
} }
/** /**
@ -330,10 +383,8 @@ void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned int src_size)
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
rgb32tobgr32_MMX(src, dst, src_size); rgb32tobgr32_MMX(src, dst, src_size);
else else
rgb32tobgr32_C(src, dst, src_size);
#else
rgb32tobgr32_C(src, dst, src_size);
#endif #endif
rgb32tobgr32_C(src, dst, src_size);
} }
void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size) void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
@ -347,10 +398,8 @@ void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned int src_size)
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
rgb24tobgr24_MMX(src, dst, src_size); rgb24tobgr24_MMX(src, dst, src_size);
else else
rgb24tobgr24_C(src, dst, src_size);
#else
rgb24tobgr24_C(src, dst, src_size);
#endif #endif
rgb24tobgr24_C(src, dst, src_size);
} }
/** /**
@ -371,10 +420,8 @@ void yv12toyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc, u
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
yv12toyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); yv12toyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
else else
yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
#else
yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
#endif #endif
yv12toyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
} }
/** /**
@ -394,10 +441,8 @@ void yuv422ptoyuy2(const uint8_t *ysrc, const uint8_t *usrc, const uint8_t *vsrc
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
yuv422ptoyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride); yuv422ptoyuy2_MMX(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
else else
yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
#else
yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
#endif #endif
yuv422ptoyuy2_C(ysrc, usrc, vsrc, dst, width, height, lumStride, chromStride, dstStride);
} }
/** /**
@ -418,10 +463,8 @@ void yuy2toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
yuy2toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); yuy2toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
else else
yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
#else
yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
#endif #endif
yuy2toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
} }
/** /**
@ -488,14 +531,13 @@ void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
rgb24toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride); rgb24toyv12_MMX(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
else else
rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
#else
rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
#endif #endif
rgb24toyv12_C(src, ydst, udst, vdst, width, height, lumStride, chromStride, srcStride);
} }
void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst, void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
int width, int height, int src1Stride, int src2Stride, int dstStride) unsigned width, unsigned height, unsigned src1Stride,
unsigned src2Stride, unsigned dstStride)
{ {
#ifdef CAN_COMPILE_X86_ASM #ifdef CAN_COMPILE_X86_ASM
// ordered per speed fasterst first // ordered per speed fasterst first
@ -506,8 +548,6 @@ void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
interleaveBytes_MMX(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride); interleaveBytes_MMX(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
else else
interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
#else
interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
#endif #endif
interleaveBytes_C(src1, src2, dst, width, height, src1Stride, src2Stride, dstStride);
} }

@ -10,12 +10,16 @@
#define RGB2RGB_INCLUDED #define RGB2RGB_INCLUDED
extern void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb24to32(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb24to16(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb24to15(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb32to24(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb32to16(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb32to16(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb32to15(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb32to15(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb24to16(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb15to16(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb24to15(const uint8_t *src,uint8_t *dst,unsigned src_size); extern void rgb15to24(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb15to32(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb16to24(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb16to32(const uint8_t *src,uint8_t *dst,unsigned src_size);
extern void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned src_size); extern void rgb32tobgr32(const uint8_t *src, uint8_t *dst, unsigned src_size);
extern void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned src_size); extern void rgb24tobgr24(const uint8_t *src, uint8_t *dst, unsigned src_size);
@ -39,7 +43,8 @@ extern void rgb24toyv12(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_
unsigned int lumStride, unsigned int chromStride, unsigned int srcStride); unsigned int lumStride, unsigned int chromStride, unsigned int srcStride);
extern void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst, extern void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
int width, int height, int src1Stride, int src2Stride, int dstStride); unsigned width, unsigned height, unsigned src1Stride,
unsigned src2Stride, unsigned dstStride);
#define MODE_RGB 0x1 #define MODE_RGB 0x1
@ -47,11 +52,11 @@ extern void interleaveBytes(uint8_t *src1, uint8_t *src2, uint8_t *dst,
typedef void (* yuv2rgb_fun) (uint8_t * image, uint8_t * py, typedef void (* yuv2rgb_fun) (uint8_t * image, uint8_t * py,
uint8_t * pu, uint8_t * pv, uint8_t * pu, uint8_t * pv,
int h_size, int v_size, unsigned h_size, unsigned v_size,
int rgb_stride, int y_stride, int uv_stride); unsigned rgb_stride, unsigned y_stride, unsigned uv_stride);
extern yuv2rgb_fun yuv2rgb; extern yuv2rgb_fun yuv2rgb;
void yuv2rgb_init (int bpp, int mode); void yuv2rgb_init (unsigned bpp, int mode);
#endif #endif

@ -8,6 +8,13 @@
* palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL) * palette & yuv & runtime cpu stuff by Michael (michaelni@gmx.at) (under GPL)
*/ */
#include <stddef.h>
#include <inttypes.h> /* for __WORDSIZE */
#ifndef __WORDSIZE
#warning You have misconfigured system and probably will lose performance!
#endif
#undef PREFETCH #undef PREFETCH
#undef MOVNTQ #undef MOVNTQ
#undef EMMS #undef EMMS
@ -56,13 +63,13 @@ static inline void RENAME(rgb24to32)(const uint8_t *src,uint8_t *dst,unsigned sr
const uint8_t *s = src; const uint8_t *s = src;
const uint8_t *end; const uint8_t *end;
#ifdef HAVE_MMX #ifdef HAVE_MMX
const uint8_t *mm_end; uint8_t *mm_end;
#endif #endif
end = s + src_size; end = s + src_size;
#ifdef HAVE_MMX #ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 23;
__asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory");
mm_end = (uint8_t*)((((unsigned long)end)/24)*24);
while(s < mm_end) while(s < mm_end)
{ {
__asm __volatile( __asm __volatile(
@ -107,12 +114,12 @@ static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,unsigned sr
const uint8_t *s = src; const uint8_t *s = src;
const uint8_t *end; const uint8_t *end;
#ifdef HAVE_MMX #ifdef HAVE_MMX
const uint8_t *mm_end; uint8_t *mm_end;
#endif #endif
end = s + src_size; end = s + src_size;
#ifdef HAVE_MMX #ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 31; mm_end = (uint8_t*)((((unsigned long)end)/32)*32);
while(s < mm_end) while(s < mm_end)
{ {
__asm __volatile( __asm __volatile(
@ -186,15 +193,16 @@ static inline void RENAME(rgb32to24)(const uint8_t *src,uint8_t *dst,unsigned sr
*/ */
static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size) static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned src_size)
{ {
register const uint8_t* s=src;
register uint8_t* d=dst;
register const uint8_t *end;
uint8_t *mm_end;
end = s + src_size;
#ifdef HAVE_MMX #ifdef HAVE_MMX
register int offs=15-src_size; __asm __volatile(PREFETCH" %0"::"m"(*s));
register const char* s=src-offs; __asm __volatile("movq %0, %%mm4"::"m"(mask15s));
register char* d=dst-offs; mm_end = (uint8_t*)((((unsigned long)end)/16)*16);
__asm __volatile(PREFETCH" %0"::"m"(*(s+offs))); while(s<mm_end)
__asm __volatile(
"movq %0, %%mm4\n\t"
::"m"(mask15s));
while(offs<0)
{ {
__asm __volatile( __asm __volatile(
PREFETCH" 32%1\n\t" PREFETCH" 32%1\n\t"
@ -208,40 +216,28 @@ static inline void RENAME(rgb15to16)(const uint8_t *src,uint8_t *dst,unsigned sr
"paddw %%mm3, %%mm2\n\t" "paddw %%mm3, %%mm2\n\t"
MOVNTQ" %%mm0, %0\n\t" MOVNTQ" %%mm0, %0\n\t"
MOVNTQ" %%mm2, 8%0" MOVNTQ" %%mm2, 8%0"
:"=m"(*(d+offs)) :"=m"(*d)
:"m"(*(s+offs)) :"m"(*s)
); );
offs+=16; d+=16;
s+=16;
} }
__asm __volatile(SFENCE:::"memory"); __asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory"); __asm __volatile(EMMS:::"memory");
#else
#if 0
const uint16_t *s1=( uint16_t * )src;
uint16_t *d1=( uint16_t * )dst;
uint16_t *e=((uint8_t *)s1)+src_size;
while( s1<e ){
register int x=*( s1++ );
/* rrrrrggggggbbbbb
0rrrrrgggggbbbbb
0111 1111 1110 0000=0x7FE0
00000000000001 1111=0x001F */
*( d1++ )=( x&0x001F )|( ( x&0x7FE0 )<<1 );
}
#else
const unsigned *s1=( unsigned * )src;
unsigned *d1=( unsigned * )dst;
int i;
int size= src_size>>2;
for(i=0; i<size; i++)
{
register int x= s1[i];
// d1[i] = x + (x&0x7FE07FE0); //faster but need msbit =0 which might not allways be true
d1[i] = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
}
#endif
#endif #endif
mm_end = (uint8_t*)((((unsigned long)end)/4)*4);
while(s < mm_end)
{
register unsigned x= *((uint32_t *)s);
*((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
d+=4;
s+=4;
}
if(s < end)
{
register unsigned short x= *((uint16_t *)s);
*((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
}
} }
static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsigned src_size) static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsigned src_size)
@ -257,17 +253,20 @@ static inline void RENAME(bgr24torgb24)(const uint8_t *src, uint8_t *dst, unsign
static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned src_size)
{ {
#ifdef HAVE_MMX
const uint8_t *s = src; const uint8_t *s = src;
const uint8_t *end,*mm_end; const uint8_t *end;
#ifdef HAVE_MMX
const uint8_t *mm_end;
#endif
uint16_t *d = (uint16_t *)dst; uint16_t *d = (uint16_t *)dst;
end = s + src_size; end = s + src_size;
mm_end = end - 15; #ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
__asm __volatile( __asm __volatile(
"movq %0, %%mm7\n\t" "movq %0, %%mm7\n\t"
"movq %1, %%mm6\n\t" "movq %1, %%mm6\n\t"
::"m"(red_16mask),"m"(green_16mask)); ::"m"(red_16mask),"m"(green_16mask));
mm_end = (uint8_t*)((((unsigned long)end)/16)*16);
while(s < mm_end) while(s < mm_end)
{ {
__asm __volatile( __asm __volatile(
@ -303,43 +302,35 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, unsigned
d += 4; d += 4;
s += 16; s += 16;
} }
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end) while(s < end)
{ {
const int b= *s++; const int b= *s++;
const int g= *s++; const int g= *s++;
const int r= *s++; const int r= *s++;
s++;
*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
s++;
} }
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#else
unsigned j,i,num_pixels=src_size/4;
uint16_t *d = (uint16_t *)dst;
for(i=0,j=0; j<num_pixels; i+=4,j++)
{
const int b= src[i+0];
const int g= src[i+1];
const int r= src[i+2];
d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
}
#endif
} }
static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned src_size)
{ {
#ifdef HAVE_MMX
const uint8_t *s = src; const uint8_t *s = src;
const uint8_t *end,*mm_end; const uint8_t *end;
#ifdef HAVE_MMX
const uint8_t *mm_end;
#endif
uint16_t *d = (uint16_t *)dst; uint16_t *d = (uint16_t *)dst;
end = s + src_size; end = s + src_size;
mm_end = end - 15; #ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
__asm __volatile( __asm __volatile(
"movq %0, %%mm7\n\t" "movq %0, %%mm7\n\t"
"movq %1, %%mm6\n\t" "movq %1, %%mm6\n\t"
::"m"(red_15mask),"m"(green_15mask)); ::"m"(red_15mask),"m"(green_15mask));
mm_end = (uint8_t*)((((unsigned long)end)/16)*16);
while(s < mm_end) while(s < mm_end)
{ {
__asm __volatile( __asm __volatile(
@ -375,43 +366,35 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, unsigned
d += 4; d += 4;
s += 16; s += 16;
} }
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end) while(s < end)
{ {
const int b= *s++; const int b= *s++;
const int g= *s++; const int g= *s++;
const int r= *s++; const int r= *s++;
s++;
*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
s++;
} }
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#else
unsigned j,i,num_pixels=src_size/4;
uint16_t *d = (uint16_t *)dst;
for(i=0,j=0; j<num_pixels; i+=4,j++)
{
const int b= src[i+0];
const int g= src[i+1];
const int r= src[i+2];
d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
}
#endif
} }
static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size) static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned src_size)
{ {
#ifdef HAVE_MMX
const uint8_t *s = src; const uint8_t *s = src;
const uint8_t *end,*mm_end; const uint8_t *end;
#ifdef HAVE_MMX
const uint8_t *mm_end;
#endif
uint16_t *d = (uint16_t *)dst; uint16_t *d = (uint16_t *)dst;
end = s + src_size; end = s + src_size;
mm_end = end - 11; #ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
__asm __volatile( __asm __volatile(
"movq %0, %%mm7\n\t" "movq %0, %%mm7\n\t"
"movq %1, %%mm6\n\t" "movq %1, %%mm6\n\t"
::"m"(red_16mask),"m"(green_16mask)); ::"m"(red_16mask),"m"(green_16mask));
mm_end = (uint8_t*)((((unsigned long)end)/16)*16);
while(s < mm_end) while(s < mm_end)
{ {
__asm __volatile( __asm __volatile(
@ -447,6 +430,9 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned
d += 4; d += 4;
s += 12; s += 12;
} }
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end) while(s < end)
{ {
const int b= *s++; const int b= *s++;
@ -454,35 +440,24 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, unsigned
const int r= *s++; const int r= *s++;
*d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
} }
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#else
unsigned j,i,num_pixels=src_size/3;
uint16_t *d = (uint16_t *)dst;
for(i=0,j=0; j<num_pixels; i+=3,j++)
{
const int b= src[i+0];
const int g= src[i+1];
const int r= src[i+2];
d[j]= (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
}
#endif
} }
static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size) static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned src_size)
{ {
#ifdef HAVE_MMX
const uint8_t *s = src; const uint8_t *s = src;
const uint8_t *end,*mm_end; const uint8_t *end;
#ifdef HAVE_MMX
const uint8_t *mm_end;
#endif
uint16_t *d = (uint16_t *)dst; uint16_t *d = (uint16_t *)dst;
end = s + src_size; end = s + src_size;
mm_end = end -11; #ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); __asm __volatile(PREFETCH" %0"::"m"(*src):"memory");
__asm __volatile( __asm __volatile(
"movq %0, %%mm7\n\t" "movq %0, %%mm7\n\t"
"movq %1, %%mm6\n\t" "movq %1, %%mm6\n\t"
::"m"(red_15mask),"m"(green_15mask)); ::"m"(red_15mask),"m"(green_15mask));
mm_end = (uint8_t*)((((unsigned long)end)/16)*16);
while(s < mm_end) while(s < mm_end)
{ {
__asm __volatile( __asm __volatile(
@ -518,6 +493,9 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned
d += 4; d += 4;
s += 12; s += 12;
} }
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end) while(s < end)
{ {
const int b= *s++; const int b= *s++;
@ -525,25 +503,448 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, unsigned
const int r= *s++; const int r= *s++;
*d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
} }
}
/*
I use here less accurate approximation by simply
left-shifting the input
value and filling the low order bits with
zeroes. This method improves png's
compression but this scheme cannot reproduce white exactly, since it does not
generate an all-ones maximum value; the net effect is to darken the
image slightly.
The better method should be "left bit replication":
4 3 2 1 0
---------
1 1 0 1 1
7 6 5 4 3 2 1 0
----------------
1 1 0 1 1 1 1 0
|=======| |===|
| Leftmost Bits Repeated to Fill Open Bits
|
Original Bits
*/
static inline void RENAME(rgb15to24)(const uint8_t *src, uint8_t *dst, unsigned src_size)
{
const uint16_t *end;
#ifdef HAVE_MMX
const uint16_t *mm_end;
#endif
uint8_t *d = (uint8_t *)dst;
const uint16_t *s = (uint16_t *)src;
end = s + src_size/2;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = (uint16_t*)((((unsigned long)end)/8)*8);
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movq %1, %%mm0\n\t"
"movq %1, %%mm1\n\t"
"movq %1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $2, %%mm1\n\t"
"psrlq $7, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %5, %%mm0\n\t"
"punpcklwd %5, %%mm1\n\t"
"punpcklwd %5, %%mm2\n\t"
"punpckhwd %5, %%mm3\n\t"
"punpckhwd %5, %%mm4\n\t"
"punpckhwd %5, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
"movq %%mm0, %%mm6\n\t"
"movq %%mm3, %%mm7\n\t"
"movq 8%1, %%mm0\n\t"
"movq 8%1, %%mm1\n\t"
"movq 8%1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $2, %%mm1\n\t"
"psrlq $7, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %5, %%mm0\n\t"
"punpcklwd %5, %%mm1\n\t"
"punpcklwd %5, %%mm2\n\t"
"punpckhwd %5, %%mm3\n\t"
"punpckhwd %5, %%mm4\n\t"
"punpckhwd %5, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
:"=m"(*d)
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
:"memory");
/* Borrowed 32 to 24 */
__asm __volatile(
"movq %%mm0, %%mm4\n\t"
"movq %%mm3, %%mm5\n\t"
"movq %%mm6, %%mm0\n\t"
"movq %%mm7, %%mm1\n\t"
"movq %%mm4, %%mm6\n\t"
"movq %%mm5, %%mm7\n\t"
"movq %%mm0, %%mm2\n\t"
"movq %%mm1, %%mm3\n\t"
"psrlq $8, %%mm2\n\t"
"psrlq $8, %%mm3\n\t"
"psrlq $8, %%mm6\n\t"
"psrlq $8, %%mm7\n\t"
"pand %2, %%mm0\n\t"
"pand %2, %%mm1\n\t"
"pand %2, %%mm4\n\t"
"pand %2, %%mm5\n\t"
"pand %3, %%mm2\n\t"
"pand %3, %%mm3\n\t"
"pand %3, %%mm6\n\t"
"pand %3, %%mm7\n\t"
"por %%mm2, %%mm0\n\t"
"por %%mm3, %%mm1\n\t"
"por %%mm6, %%mm4\n\t"
"por %%mm7, %%mm5\n\t"
"movq %%mm1, %%mm2\n\t"
"movq %%mm4, %%mm3\n\t"
"psllq $48, %%mm2\n\t"
"psllq $32, %%mm3\n\t"
"pand %4, %%mm2\n\t"
"pand %5, %%mm3\n\t"
"por %%mm2, %%mm0\n\t"
"psrlq $16, %%mm1\n\t"
"psrlq $32, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm3, %%mm1\n\t"
"pand %6, %%mm5\n\t"
"por %%mm5, %%mm4\n\t"
MOVNTQ" %%mm0, %0\n\t"
MOVNTQ" %%mm1, 8%0\n\t"
MOVNTQ" %%mm4, 16%0"
:"=m"(*d)
:"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
:"memory");
d += 24;
s += 8;
}
__asm __volatile(SFENCE:::"memory"); __asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory"); __asm __volatile(EMMS:::"memory");
#else #endif
unsigned j,i,num_pixels=src_size/3; while(s < end)
uint16_t *d = (uint16_t *)dst; {
for(i=0,j=0; j<num_pixels; i+=3,j++) register uint16_t bgr;
bgr = *s++;
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x3E0)>>2;
*d++ = (bgr&0x7C00)>>7;
}
}
static inline void RENAME(rgb16to24)(const uint8_t *src, uint8_t *dst, unsigned src_size)
{
const uint16_t *end;
#ifdef HAVE_MMX
const uint16_t *mm_end;
#endif
uint8_t *d = (uint8_t *)dst;
const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = (uint16_t*)((((unsigned long)end)/8)*8);
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movq %1, %%mm0\n\t"
"movq %1, %%mm1\n\t"
"movq %1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $3, %%mm1\n\t"
"psrlq $8, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %5, %%mm0\n\t"
"punpcklwd %5, %%mm1\n\t"
"punpcklwd %5, %%mm2\n\t"
"punpckhwd %5, %%mm3\n\t"
"punpckhwd %5, %%mm4\n\t"
"punpckhwd %5, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
"movq %%mm0, %%mm6\n\t"
"movq %%mm3, %%mm7\n\t"
"movq 8%1, %%mm0\n\t"
"movq 8%1, %%mm1\n\t"
"movq 8%1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $3, %%mm1\n\t"
"psrlq $8, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %5, %%mm0\n\t"
"punpcklwd %5, %%mm1\n\t"
"punpcklwd %5, %%mm2\n\t"
"punpckhwd %5, %%mm3\n\t"
"punpckhwd %5, %%mm4\n\t"
"punpckhwd %5, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
:"=m"(*d)
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
:"memory");
/* Borrowed 32 to 24 */
__asm __volatile(
"movq %%mm0, %%mm4\n\t"
"movq %%mm3, %%mm5\n\t"
"movq %%mm6, %%mm0\n\t"
"movq %%mm7, %%mm1\n\t"
"movq %%mm4, %%mm6\n\t"
"movq %%mm5, %%mm7\n\t"
"movq %%mm0, %%mm2\n\t"
"movq %%mm1, %%mm3\n\t"
"psrlq $8, %%mm2\n\t"
"psrlq $8, %%mm3\n\t"
"psrlq $8, %%mm6\n\t"
"psrlq $8, %%mm7\n\t"
"pand %2, %%mm0\n\t"
"pand %2, %%mm1\n\t"
"pand %2, %%mm4\n\t"
"pand %2, %%mm5\n\t"
"pand %3, %%mm2\n\t"
"pand %3, %%mm3\n\t"
"pand %3, %%mm6\n\t"
"pand %3, %%mm7\n\t"
"por %%mm2, %%mm0\n\t"
"por %%mm3, %%mm1\n\t"
"por %%mm6, %%mm4\n\t"
"por %%mm7, %%mm5\n\t"
"movq %%mm1, %%mm2\n\t"
"movq %%mm4, %%mm3\n\t"
"psllq $48, %%mm2\n\t"
"psllq $32, %%mm3\n\t"
"pand %4, %%mm2\n\t"
"pand %5, %%mm3\n\t"
"por %%mm2, %%mm0\n\t"
"psrlq $16, %%mm1\n\t"
"psrlq $32, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm3, %%mm1\n\t"
"pand %6, %%mm5\n\t"
"por %%mm5, %%mm4\n\t"
MOVNTQ" %%mm0, %0\n\t"
MOVNTQ" %%mm1, 8%0\n\t"
MOVNTQ" %%mm4, 16%0"
:"=m"(*d)
:"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
:"memory");
d += 24;
s += 8;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
register uint16_t bgr;
bgr = *s++;
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x7E0)>>3;
*d++ = (bgr&0xF800)>>8;
}
}
static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, unsigned src_size)
{
const uint16_t *end;
#ifdef HAVE_MMX
const uint16_t *mm_end;
#endif
uint8_t *d = (uint8_t *)dst;
const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
__asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory");
mm_end = (uint16_t*)((((unsigned long)end)/4)*4);
while(s < mm_end)
{ {
const int b= src[i+0]; __asm __volatile(
const int g= src[i+1]; PREFETCH" 32%1\n\t"
const int r= src[i+2]; "movq %1, %%mm0\n\t"
"movq %1, %%mm1\n\t"
"movq %1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $2, %%mm1\n\t"
"psrlq $7, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %%mm7, %%mm0\n\t"
"punpcklwd %%mm7, %%mm1\n\t"
"punpcklwd %%mm7, %%mm2\n\t"
"punpckhwd %%mm7, %%mm3\n\t"
"punpckhwd %%mm7, %%mm4\n\t"
"punpckhwd %%mm7, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
MOVNTQ" %%mm0, %0\n\t"
MOVNTQ" %%mm3, 8%0\n\t"
:"=m"(*d)
:"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
:"memory");
d += 16;
s += 4;
}
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif
while(s < end)
{
register uint16_t bgr;
bgr = *s++;
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x3E0)>>2;
*d++ = (bgr&0x7C00)>>7;
*d++ = 0;
}
}
d[j]= (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, unsigned src_size)
{
const uint16_t *end;
#ifdef HAVE_MMX
const uint16_t *mm_end;
#endif
uint8_t *d = (uint8_t *)dst;
const uint16_t *s = (uint16_t *)src;
end = s + src_size/2;
#ifdef HAVE_MMX
__asm __volatile(PREFETCH" %0"::"m"(*s):"memory");
__asm __volatile("pxor %%mm7,%%mm7\n\t":::"memory");
mm_end = (uint16_t*)((((unsigned long)end)/4)*4);
while(s < mm_end)
{
__asm __volatile(
PREFETCH" 32%1\n\t"
"movq %1, %%mm0\n\t"
"movq %1, %%mm1\n\t"
"movq %1, %%mm2\n\t"
"pand %2, %%mm0\n\t"
"pand %3, %%mm1\n\t"
"pand %4, %%mm2\n\t"
"psllq $3, %%mm0\n\t"
"psrlq $3, %%mm1\n\t"
"psrlq $8, %%mm2\n\t"
"movq %%mm0, %%mm3\n\t"
"movq %%mm1, %%mm4\n\t"
"movq %%mm2, %%mm5\n\t"
"punpcklwd %%mm7, %%mm0\n\t"
"punpcklwd %%mm7, %%mm1\n\t"
"punpcklwd %%mm7, %%mm2\n\t"
"punpckhwd %%mm7, %%mm3\n\t"
"punpckhwd %%mm7, %%mm4\n\t"
"punpckhwd %%mm7, %%mm5\n\t"
"psllq $8, %%mm1\n\t"
"psllq $16, %%mm2\n\t"
"por %%mm1, %%mm0\n\t"
"por %%mm2, %%mm0\n\t"
"psllq $8, %%mm4\n\t"
"psllq $16, %%mm5\n\t"
"por %%mm4, %%mm3\n\t"
"por %%mm5, %%mm3\n\t"
MOVNTQ" %%mm0, %0\n\t"
MOVNTQ" %%mm3, 8%0\n\t"
:"=m"(*d)
:"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
:"memory");
d += 16;
s += 4;
} }
__asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory");
#endif #endif
while(s < end)
{
register uint16_t bgr;
bgr = *s++;
*d++ = (bgr&0x1F)<<3;
*d++ = (bgr&0x7E0)>>3;
*d++ = (bgr&0xF800)>>8;
*d++ = 0;
}
} }
static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned int src_size) static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsigned int src_size)
{ {
#ifdef HAVE_MMX #ifdef HAVE_MMX
/* TODO: unroll this loop */
asm volatile ( asm volatile (
"xorl %%eax, %%eax \n\t" "xorl %%eax, %%eax \n\t"
".balign 16 \n\t" ".balign 16 \n\t"
@ -554,9 +955,9 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsign
"movq %%mm0, %%mm2 \n\t" "movq %%mm0, %%mm2 \n\t"
"pslld $16, %%mm0 \n\t" "pslld $16, %%mm0 \n\t"
"psrld $16, %%mm1 \n\t" "psrld $16, %%mm1 \n\t"
"pand "MANGLE(mask32r)", %%mm0 \n\t" "pand "MANGLE(mask32r)", %%mm0 \n\t"
"pand "MANGLE(mask32g)", %%mm2 \n\t" "pand "MANGLE(mask32g)", %%mm2 \n\t"
"pand "MANGLE(mask32b)", %%mm1 \n\t" "pand "MANGLE(mask32b)", %%mm1 \n\t"
"por %%mm0, %%mm2 \n\t" "por %%mm0, %%mm2 \n\t"
"por %%mm1, %%mm2 \n\t" "por %%mm1, %%mm2 \n\t"
MOVNTQ" %%mm2, (%1, %%eax) \n\t" MOVNTQ" %%mm2, (%1, %%eax) \n\t"
@ -570,8 +971,8 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsign
__asm __volatile(SFENCE:::"memory"); __asm __volatile(SFENCE:::"memory");
__asm __volatile(EMMS:::"memory"); __asm __volatile(EMMS:::"memory");
#else #else
int i; unsigned i;
int num_pixels= src_size >> 2; unsigned num_pixels = src_size >> 2;
for(i=0; i<num_pixels; i++) for(i=0; i<num_pixels; i++)
{ {
dst[4*i + 0] = src[4*i + 2]; dst[4*i + 0] = src[4*i + 2];
@ -583,7 +984,7 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, unsign
static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size) static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsigned int src_size)
{ {
int i; unsigned i;
#ifdef HAVE_MMX #ifdef HAVE_MMX
int mmx_size= 23 - src_size; int mmx_size= 23 - src_size;
asm volatile ( asm volatile (
@ -631,15 +1032,16 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, unsign
__asm __volatile(EMMS:::"memory"); __asm __volatile(EMMS:::"memory");
if(mmx_size==23) return; //finihsed, was multiple of 8 if(mmx_size==23) return; //finihsed, was multiple of 8
src+= src_size; src+= src_size;
dst+= src_size; dst+= src_size;
src_size= 23 - mmx_size; src_size= 23-mmx_size;
src-= src_size; src-= src_size;
dst-= src_size; dst-= src_size;
#endif #endif
for(i=0; i<src_size; i+=3) for(i=0; i<src_size; i+=3)
{ {
register int x; register uint8_t x;
x = src[i + 2]; x = src[i + 2];
dst[i + 1] = src[i + 1]; dst[i + 1] = src[i + 1];
dst[i + 2] = src[i + 0]; dst[i + 2] = src[i + 0];
@ -651,8 +1053,8 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
unsigned int width, unsigned int height, unsigned int width, unsigned int height,
unsigned int lumStride, unsigned int chromStride, unsigned int dstStride, int vertLumPerChroma) unsigned int lumStride, unsigned int chromStride, unsigned int dstStride, int vertLumPerChroma)
{ {
int y; unsigned y;
const int chromWidth= width>>1; const unsigned chromWidth= width>>1;
for(y=0; y<height; y++) for(y=0; y<height; y++)
{ {
#ifdef HAVE_MMX #ifdef HAVE_MMX
@ -691,14 +1093,33 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
: "%eax" : "%eax"
); );
#else #else
#if __WORDSIZE >= 64
int i; int i;
for(i=0; i<chromWidth; i++) uint64_t *ldst = (uint64_t *) dst;
{ const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
dst[4*i+0] = ysrc[2*i+0]; for(i = 0; i < chromWidth; i += 2){
dst[4*i+1] = usrc[i]; uint64_t k, l;
dst[4*i+2] = ysrc[2*i+1]; k = yc[0] + (uc[0] << 8) +
dst[4*i+3] = vsrc[i]; (yc[1] << 16) + (vc[0] << 24);
l = yc[2] + (uc[1] << 8) +
(yc[3] << 16) + (vc[1] << 24);
*ldst++ = k + (l << 32);
yc += 4;
uc += 2;
vc += 2;
} }
#else
int i, *idst = (int32_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for(i = 0; i < chromWidth; i++){
*idst++ = yc[0] + (uc[0] << 8) +
(yc[1] << 16) + (vc[0] << 24);
yc += 2;
uc++;
vc++;
}
#endif
#endif #endif
if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) ) if((y&(vertLumPerChroma-1))==(vertLumPerChroma-1) )
{ {
@ -748,8 +1169,8 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
unsigned int width, unsigned int height, unsigned int width, unsigned int height,
unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
{ {
int y; unsigned y;
const int chromWidth= width>>1; const unsigned chromWidth= width>>1;
for(y=0; y<height; y+=2) for(y=0; y<height; y+=2)
{ {
#ifdef HAVE_MMX #ifdef HAVE_MMX
@ -835,7 +1256,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
: "memory", "%eax" : "memory", "%eax"
); );
#else #else
int i; unsigned i;
for(i=0; i<chromWidth; i++) for(i=0; i<chromWidth; i++)
{ {
ydst[2*i+0] = src[4*i+0]; ydst[2*i+0] = src[4*i+0];
@ -884,8 +1305,8 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
unsigned int width, unsigned int height, unsigned int width, unsigned int height,
unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
{ {
int y; unsigned y;
const int chromWidth= width>>1; const unsigned chromWidth= width>>1;
for(y=0; y<height; y+=2) for(y=0; y<height; y+=2)
{ {
#ifdef HAVE_MMX #ifdef HAVE_MMX
@ -971,7 +1392,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
: "memory", "%eax" : "memory", "%eax"
); );
#else #else
int i; unsigned i;
for(i=0; i<chromWidth; i++) for(i=0; i<chromWidth; i++)
{ {
udst[i] = src[4*i+0]; udst[i] = src[4*i+0];
@ -1010,12 +1431,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
unsigned int width, unsigned int height, unsigned int width, unsigned int height,
unsigned int lumStride, unsigned int chromStride, unsigned int srcStride) unsigned int lumStride, unsigned int chromStride, unsigned int srcStride)
{ {
int y; unsigned y;
const int chromWidth= width>>1; const unsigned chromWidth= width>>1;
#ifdef HAVE_MMX #ifdef HAVE_MMX
for(y=0; y<height-2; y+=2) for(y=0; y<height-2; y+=2)
{ {
int i; unsigned i;
for(i=0; i<2; i++) for(i=0; i<2; i++)
{ {
asm volatile( asm volatile(
@ -1254,7 +1675,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
#endif #endif
for(; y<height; y+=2) for(; y<height; y+=2)
{ {
int i; unsigned i;
for(i=0; i<chromWidth; i++) for(i=0; i<chromWidth; i++)
{ {
unsigned int b= src[6*i+0]; unsigned int b= src[6*i+0];
@ -1304,12 +1725,13 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
} }
void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
int width, int height, int src1Stride, int src2Stride, int dstStride){ unsigned width, unsigned height, unsigned src1Stride,
int h; unsigned src2Stride, unsigned dstStride){
unsigned h;
for(h=0; h < height; h++) for(h=0; h < height; h++)
{ {
int w; unsigned w;
#ifdef HAVE_MMX #ifdef HAVE_MMX
#ifdef HAVE_SSE2 #ifdef HAVE_SSE2

@ -65,6 +65,14 @@ untested special converters
#include "rgb2rgb.h" #include "rgb2rgb.h"
#include "../libvo/fastmemcpy.h" #include "../libvo/fastmemcpy.h"
#include "../mp_msg.h" #include "../mp_msg.h"
#define MSG_WARN(args...) mp_msg(MSGT_SWS,MSGL_WARN, ##args )
#define MSG_FATAL(args...) mp_msg(MSGT_SWS,MSGL_FATAL, ##args )
#define MSG_ERR(args...) mp_msg(MSGT_SWS,MSGL_ERR, ##args )
#define MSG_V(args...) mp_msg(MSGT_SWS,MSGL_V, ##args )
#define MSG_DBG2(args...) mp_msg(MSGT_SWS,MSGL_DBG2, ##args )
#define MSG_INFO(args...) mp_msg(MSGT_SWS,MSGL_INFO, ##args )
#undef MOVNTQ #undef MOVNTQ
#undef PAVGB #undef PAVGB
@ -92,19 +100,26 @@ untested special converters
#endif #endif
//FIXME replace this with something faster //FIXME replace this with something faster
#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420) #define isBGR(x) ((x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15)
#define isYUV(x) ((x)==IMGFMT_YUY2 || isPlanarYUV(x)) #define isRGB(x) ((x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24|| (x)==IMGFMT_RGB16|| (x)==IMGFMT_RGB15)
#define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420) #define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV|| (x)==IMGFMT_YVU9 || (x)==IMGFMT_IF09)
#define isYUV(x) (!(isBGR(x) || isRGB(x)))
#define isHalfChrV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
#define isHalfChrH(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420) #define isHalfChrH(x) ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420)
#define isPacked(x) ((x)==IMGFMT_YUY2 || ((x)&IMGFMT_BGR_MASK)==IMGFMT_BGR || ((x)&IMGFMT_RGB_MASK)==IMGFMT_RGB) #define isPacked(x) (isYUV(x) && !isPlanarYUV(x))
#define isGray(x) ((x)==IMGFMT_Y800) #define isGray(x) ((x)==IMGFMT_Y800) /* Behaviour the same as PACKED but it's PLANAR */
#define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 \ #define isSupportedIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 \
|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\ || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
|| (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\ || (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
|| (x)==IMGFMT_Y800) || (x)==IMGFMT_Y800)
#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 \ #define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 \
|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15) || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15)
#define isBGR(x) ((x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15) #define isSupportedUnscaledIn(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 || (x)==IMGFMT_NV12 \
|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15\
|| (x)==IMGFMT_RGB32|| (x)==IMGFMT_RGB24\
|| (x)==IMGFMT_Y800)
#define isSupportedUnscaledOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x) == IMGFMT_YUY2 \
|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15)
#define RGB2YUV_SHIFT 16 #define RGB2YUV_SHIFT 16
#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
@ -751,7 +766,6 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
if (flags&SWS_BICUBIC) filterSize= 4; if (flags&SWS_BICUBIC) filterSize= 4;
else if(flags&SWS_X ) filterSize= 4; else if(flags&SWS_X ) filterSize= 4;
else filterSize= 2; // SWS_BILINEAR / SWS_AREA else filterSize= 2; // SWS_BILINEAR / SWS_AREA
// printf("%d %d %d\n", filterSize, srcW, dstW);
filter= (double*)memalign(8, dstW*sizeof(double)*filterSize); filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
xDstInSrc= xInc/2 - 0x8000; xDstInSrc= xInc/2 - 0x8000;
@ -780,12 +794,10 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
y4 = ( -1.0*d + 1.0*d*d*d)/6.0; y4 = ( -1.0*d + 1.0*d*d*d)/6.0;
} }
// printf("%d %d %d \n", coeff, (int)d, xDstInSrc);
filter[i*filterSize + 0]= y1; filter[i*filterSize + 0]= y1;
filter[i*filterSize + 1]= y2; filter[i*filterSize + 1]= y2;
filter[i*filterSize + 2]= y3; filter[i*filterSize + 2]= y3;
filter[i*filterSize + 3]= y4; filter[i*filterSize + 3]= y4;
// printf("%1.3f %1.3f %1.3f %1.3f %1.3f\n",d , y1, y2, y3, y4);
} }
else else
{ {
@ -795,7 +807,6 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16); double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16);
double coeff= 1.0 - d; double coeff= 1.0 - d;
if(coeff<0) coeff=0; if(coeff<0) coeff=0;
// printf("%d %d %d \n", coeff, (int)d, xDstInSrc);
filter[i*filterSize + j]= coeff; filter[i*filterSize + j]= coeff;
xx++; xx++;
} }
@ -812,7 +823,6 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
else if(flags&SWS_X) filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW); else if(flags&SWS_X) filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW);
else if(flags&SWS_AREA) filterSize= (int)ceil(1 + 1.0*srcW / (double)dstW); else if(flags&SWS_AREA) filterSize= (int)ceil(1 + 1.0*srcW / (double)dstW);
else /* BILINEAR */ filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW); else /* BILINEAR */ filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW);
// printf("%d %d %d\n", *filterSize, srcW, dstW);
filter= (double*)memalign(8, dstW*sizeof(double)*filterSize); filter= (double*)memalign(8, dstW*sizeof(double)*filterSize);
xDstInSrc= xInc/2 - 0x8000; xDstInSrc= xInc/2 - 0x8000;
@ -849,7 +859,6 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
coeff= 1.0 - d; coeff= 1.0 - d;
if(coeff<0) coeff=0; if(coeff<0) coeff=0;
} }
// printf("%1.3f %2.3f %d \n", coeff, d, xDstInSrc);
filter[i*filterSize + j]= coeff; filter[i*filterSize + j]= coeff;
xx++; xx++;
} }
@ -940,7 +949,7 @@ static inline void initFilter(int16_t **outFilter, int16_t **filterPos, int *out
*outFilterSize= filterSize; *outFilterSize= filterSize;
if(flags&SWS_PRINT_INFO) if(flags&SWS_PRINT_INFO)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize); MSG_INFO("SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
/* try to reduce the filter-size (step2 reduce it) */ /* try to reduce the filter-size (step2 reduce it) */
for(i=0; i<dstW; i++) for(i=0; i<dstW; i++)
{ {
@ -1254,6 +1263,32 @@ cpuCaps= gCpuCaps;
#endif //!RUNTIME_CPUDETECT #endif //!RUNTIME_CPUDETECT
} }
static void PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
/* Copy Y plane */
if(dstStride[0]==srcStride[0])
memcpy(dst, src[0], srcSliceH*dstStride[0]);
else
{
int i;
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst;
for(i=0; i<srcSliceH; i++)
{
memcpy(dstPtr, srcPtr, srcStride[0]);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
dst = dstParam[1] + dstStride[1]*srcSliceY;
if(c->srcFormat==IMGFMT_YV12)
interleaveBytes( src[1],src[2],dst,c->srcW,srcSliceH,srcStride[1],srcStride[2],dstStride[0] );
else /* I420 & IYUV */
interleaveBytes( src[2],src[1],dst,c->srcW,srcSliceH,srcStride[2],srcStride[1],dstStride[0] );
}
/* Warper functions for yuv2bgr */ /* Warper functions for yuv2bgr */
static void planarYuvToBgr(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static void planarYuvToBgr(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){ int srcSliceH, uint8_t* dstParam[], int dstStride[]){
@ -1265,6 +1300,16 @@ static void planarYuvToBgr(SwsContext *c, uint8_t* src[], int srcStride[], int s
yuv2rgb( dst,src[0],src[2],src[1],c->srcW,srcSliceH,dstStride[0],srcStride[0],srcStride[1] ); yuv2rgb( dst,src[0],src[2],src[1],c->srcW,srcSliceH,dstStride[0],srcStride[0],srcStride[1] );
} }
static void Planar2PackedWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride[]){
uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
if(c->srcFormat==IMGFMT_YV12)
yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
else /* I420 & IYUV */
yv12toyuy2( src[0],src[2],src[1],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
}
static void bgr24to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static void bgr24to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[]){
@ -1285,6 +1330,46 @@ static void bgr24to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int
} }
} }
static void bgr24to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
if(dstStride[0]*3==srcStride[0]*2)
rgb24to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
else
{
int i;
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for(i=0; i<srcSliceH; i++)
{
rgb24to16(srcPtr, dstPtr, c->srcW*3);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
}
static void bgr24to15Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
if(dstStride[0]*3==srcStride[0]*2)
rgb24to15(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
else
{
int i;
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for(i=0; i<srcSliceH; i++)
{
rgb24to15(srcPtr, dstPtr, c->srcW*3);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
}
static void bgr32to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static void bgr32to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[]){
@ -1305,6 +1390,46 @@ static void bgr32to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int
} }
} }
static void bgr32to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
if(dstStride[0]*4==srcStride[0]*2)
rgb32to16(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
else
{
int i;
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for(i=0; i<srcSliceH; i++)
{
rgb32to16(srcPtr, dstPtr, c->srcW<<2);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
}
static void bgr32to15Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
if(dstStride[0]*4==srcStride[0]*2)
rgb32to15(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
else
{
int i;
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for(i=0; i<srcSliceH; i++)
{
rgb32to15(srcPtr, dstPtr, c->srcW<<2);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
}
static void bgr15to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static void bgr15to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[]){
@ -1325,6 +1450,86 @@ static void bgr15to16Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int
} }
} }
static void bgr15to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
if(dstStride[0]*2==srcStride[0]*3)
rgb15to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
else
{
int i;
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for(i=0; i<srcSliceH; i++)
{
rgb15to24(srcPtr, dstPtr, c->srcW<<1);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
}
static void bgr15to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
if(dstStride[0]*2==srcStride[0]*4)
rgb15to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
else
{
int i;
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for(i=0; i<srcSliceH; i++)
{
rgb15to32(srcPtr, dstPtr, c->srcW<<1);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
}
static void bgr16to24Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
if(dstStride[0]*2==srcStride[0]*3)
rgb16to24(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
else
{
int i;
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for(i=0; i<srcSliceH; i++)
{
rgb16to24(srcPtr, dstPtr, c->srcW<<1);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
}
static void bgr16to32Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){
if(dstStride[0]*2==srcStride[0]*4)
rgb16to32(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
else
{
int i;
uint8_t *srcPtr= src[0];
uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
for(i=0; i<srcSliceH; i++)
{
rgb16to32(srcPtr, dstPtr, c->srcW<<1);
srcPtr+= srcStride[0];
dstPtr+= dstStride[0];
}
}
}
static void bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static void bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[]){
@ -1346,21 +1551,25 @@ static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[],
uint8_t *src[3]; uint8_t *src[3];
uint8_t *dst[3]; uint8_t *dst[3];
if(c->srcFormat == IMGFMT_I420){ if(isPlanarYUV(c->srcFormat))
{
if(c->srcFormat == IMGFMT_I420 || c->srcFormat == IMGFMT_IYUV){
src[0]= srcParam[0]; src[0]= srcParam[0];
src[1]= srcParam[2]; src[1]= srcParam[2];
src[2]= srcParam[1]; src[2]= srcParam[1];
srcStride[0]= srcStrideParam[0]; srcStride[0]= srcStrideParam[0];
srcStride[1]= srcStrideParam[2]; srcStride[1]= srcStrideParam[2];
srcStride[2]= srcStrideParam[1]; srcStride[2]= srcStrideParam[1];
} }
else if(c->srcFormat==IMGFMT_YV12){ else
{
src[0]= srcParam[0]; src[0]= srcParam[0];
src[1]= srcParam[1]; src[1]= srcParam[1];
src[2]= srcParam[2]; src[2]= srcParam[2];
srcStride[0]= srcStrideParam[0]; srcStride[0]= srcStrideParam[0];
srcStride[1]= srcStrideParam[1]; srcStride[1]= srcStrideParam[1];
srcStride[2]= srcStrideParam[2]; srcStride[2]= srcStrideParam[2];
}
} }
else if(isPacked(c->srcFormat) || isGray(c->srcFormat)){ else if(isPacked(c->srcFormat) || isGray(c->srcFormat)){
src[0]= srcParam[0]; src[0]= srcParam[0];
@ -1371,7 +1580,7 @@ static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[],
srcStride[2]= 0; srcStride[2]= 0;
} }
if(c->dstFormat == IMGFMT_I420){ if(c->dstFormat == IMGFMT_I420 || c->dstFormat == IMGFMT_IYUV){
dst[0]= dstParam[0]; dst[0]= dstParam[0];
dst[1]= dstParam[2]; dst[1]= dstParam[2];
dst[2]= dstParam[1]; dst[2]= dstParam[1];
@ -1411,9 +1620,21 @@ static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[],
int plane; int plane;
for(plane=0; plane<3; plane++) for(plane=0; plane<3; plane++)
{ {
int length= plane==0 ? c->srcW : ((c->srcW+1)>>1); int length;
int y= plane==0 ? srcSliceY: ((srcSliceY+1)>>1); int y;
int height= plane==0 ? srcSliceH: ((srcSliceH+1)>>1); int height;
if(c->srcFormat == IMGFMT_YVU9 || c->srcFormat == IMGFMT_IF09)
{
length= plane==0 ? c->srcW : ((c->srcW+1)>>2);
y= plane==0 ? srcSliceY: ((srcSliceY+1)>>2);
height= plane==0 ? srcSliceH: ((srcSliceH+1)>>2);
}
else
{
length= plane==0 ? c->srcW : ((c->srcW+1)>>1);
y= plane==0 ? srcSliceY: ((srcSliceY+1)>>1);
height= plane==0 ? srcSliceH: ((srcSliceH+1)>>1);
}
if(dstStride[plane]==srcStride[plane]) if(dstStride[plane]==srcStride[plane])
memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]); memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
@ -1433,12 +1654,23 @@ static void simpleCopy(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[],
} }
} }
static uint32_t remove_dup_fourcc(uint32_t fourcc)
{
switch(fourcc)
{
case IMGFMT_IYUV: return IMGFMT_I420;
case IMGFMT_Y8 : return IMGFMT_Y800;
default: return fourcc;
}
}
SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags, SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
SwsFilter *srcFilter, SwsFilter *dstFilter){ SwsFilter *srcFilter, SwsFilter *dstFilter){
SwsContext *c; SwsContext *c;
int i; int i;
int usesFilter; int usesFilter;
int simple_copy, unscaled_copy;
SwsFilter dummyFilter= {NULL, NULL, NULL, NULL}; SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
#ifdef ARCH_X86 #ifdef ARCH_X86
@ -1449,25 +1681,44 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
if(swScale==NULL) globalInit(); if(swScale==NULL) globalInit();
/* avoid dupplicate Formats, so we dont need to check to much */ /* avoid dupplicate Formats, so we dont need to check to much */
if(srcFormat==IMGFMT_IYUV) srcFormat=IMGFMT_I420; srcFormat = remove_dup_fourcc(srcFormat);
if(srcFormat==IMGFMT_Y8) srcFormat=IMGFMT_Y800; dstFormat = remove_dup_fourcc(dstFormat);
if(dstFormat==IMGFMT_Y8) dstFormat=IMGFMT_Y800; /* don't refuse this beauty */
unscaled_copy = (srcW == dstW && srcH == dstH);
if(!isSupportedIn(srcFormat)) simple_copy = (srcW == dstW && srcH == dstH && srcFormat == dstFormat);
{ if(!simple_copy)
mp_msg(MSGT_SWS,MSGL_ERR,"swScaler: %s is not supported as input format\n", vo_format_name(srcFormat));
return NULL;
}
if(!isSupportedOut(dstFormat))
{ {
mp_msg(MSGT_SWS,MSGL_ERR,"swScaler: %s is not supported as output format\n", vo_format_name(dstFormat)); if(unscaled_copy)
return NULL; {
if(!isSupportedUnscaledIn(srcFormat))
{
MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat));
return NULL;
}
if(!isSupportedUnscaledOut(dstFormat))
{
MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat));
return NULL;
}
}
else
{
if(!isSupportedIn(srcFormat))
{
MSG_ERR("swScaler: %s is not supported as input format\n", vo_format_name(srcFormat));
return NULL;
}
if(!isSupportedOut(dstFormat))
{
MSG_ERR("swScaler: %s is not supported as output format\n", vo_format_name(dstFormat));
return NULL;
}
}
} }
/* sanity check */ /* sanity check */
if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
{ {
mp_msg(MSGT_SWS,MSGL_ERR,"swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", MSG_ERR("swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
srcW, srcH, dstW, dstH); srcW, srcH, dstW, dstH);
return NULL; return NULL;
} }
@ -1501,6 +1752,26 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
/* unscaled special Cases */ /* unscaled special Cases */
if(srcW==dstW && srcH==dstH && !usesFilter) if(srcW==dstW && srcH==dstH && !usesFilter)
{ {
/* yv12_to_nv12 */
if((srcFormat == IMGFMT_YV12||srcFormat==IMGFMT_I420)&&dstFormat == IMGFMT_NV12)
{
c->swScale= PlanarToNV12Wrapper;
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
/* yv12_to_yuy2 */
if((srcFormat == IMGFMT_YV12||srcFormat==IMGFMT_I420)&&dstFormat == IMGFMT_YUY2)
{
c->swScale= Planar2PackedWrapper;
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
/* yuv2bgr */ /* yuv2bgr */
if(isPlanarYUV(srcFormat) && isBGR(dstFormat)) if(isPlanarYUV(srcFormat) && isBGR(dstFormat))
{ {
@ -1516,7 +1787,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
c->swScale= planarYuvToBgr; c->swScale= planarYuvToBgr;
if(flags&SWS_PRINT_INFO) if(flags&SWS_PRINT_INFO)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat)); vo_format_name(srcFormat), vo_format_name(dstFormat));
return c; return c;
} }
@ -1527,7 +1798,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
c->swScale= simpleCopy; c->swScale= simpleCopy;
if(flags&SWS_PRINT_INFO) if(flags&SWS_PRINT_INFO)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat)); vo_format_name(srcFormat), vo_format_name(dstFormat));
return c; return c;
} }
@ -1539,7 +1810,31 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
c->swScale= bgr32to24Wrapper; c->swScale= bgr32to24Wrapper;
if(flags&SWS_PRINT_INFO) if(flags&SWS_PRINT_INFO)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
/* bgr32to16 & rgb32to16*/
if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR16)
||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB16))
{
c->swScale= bgr32to16Wrapper;
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
/* bgr32to15 & rgb32to15*/
if((srcFormat==IMGFMT_BGR32 && dstFormat==IMGFMT_BGR15)
||(srcFormat==IMGFMT_RGB32 && dstFormat==IMGFMT_RGB15))
{
c->swScale= bgr32to15Wrapper;
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat)); vo_format_name(srcFormat), vo_format_name(dstFormat));
return c; return c;
} }
@ -1551,7 +1846,31 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
c->swScale= bgr24to32Wrapper; c->swScale= bgr24to32Wrapper;
if(flags&SWS_PRINT_INFO) if(flags&SWS_PRINT_INFO)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
/* bgr24to16 & rgb24to16*/
if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR16)
||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB16))
{
c->swScale= bgr24to16Wrapper;
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
/* bgr24to15 & rgb24to15*/
if((srcFormat==IMGFMT_BGR24 && dstFormat==IMGFMT_BGR15)
||(srcFormat==IMGFMT_RGB24 && dstFormat==IMGFMT_RGB15))
{
c->swScale= bgr24to15Wrapper;
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat)); vo_format_name(srcFormat), vo_format_name(dstFormat));
return c; return c;
} }
@ -1562,7 +1881,55 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
c->swScale= bgr15to16Wrapper; c->swScale= bgr15to16Wrapper;
if(flags&SWS_PRINT_INFO) if(flags&SWS_PRINT_INFO)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
/* bgr15to24 */
if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR24)
||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB24))
{
c->swScale= bgr15to24Wrapper;
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
/* bgr15to32 */
if((srcFormat==IMGFMT_BGR15 && dstFormat==IMGFMT_BGR32)
||(srcFormat==IMGFMT_RGB15 && dstFormat==IMGFMT_RGB32))
{
c->swScale= bgr15to32Wrapper;
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
/* bgr16to24 */
if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR24)
||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB24))
{
c->swScale= bgr16to24Wrapper;
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat));
return c;
}
/* bgr16to32 */
if((srcFormat==IMGFMT_BGR16 && dstFormat==IMGFMT_BGR32)
||(srcFormat==IMGFMT_RGB16 && dstFormat==IMGFMT_RGB32))
{
c->swScale= bgr16to32Wrapper;
if(flags&SWS_PRINT_INFO)
MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat)); vo_format_name(srcFormat), vo_format_name(dstFormat));
return c; return c;
} }
@ -1573,7 +1940,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
c->swScale= bgr24toyv12Wrapper; c->swScale= bgr24toyv12Wrapper;
if(flags&SWS_PRINT_INFO) if(flags&SWS_PRINT_INFO)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using unscaled %s -> %s special converter\n", MSG_INFO("SwScaler: using unscaled %s -> %s special converter\n",
vo_format_name(srcFormat), vo_format_name(dstFormat)); vo_format_name(srcFormat), vo_format_name(dstFormat));
return c; return c;
} }
@ -1585,7 +1952,7 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
{ {
if(flags&SWS_PRINT_INFO) if(flags&SWS_PRINT_INFO)
mp_msg(MSGT_SWS,MSGL_WARN,"SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n"); MSG_INFO("SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
} }
} }
else else
@ -1723,33 +2090,35 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
char *dither= ""; char *dither= "";
#endif #endif
if(flags&SWS_FAST_BILINEAR) if(flags&SWS_FAST_BILINEAR)
mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: FAST_BILINEAR scaler, "); MSG_INFO("\nSwScaler: FAST_BILINEAR scaler, ");
else if(flags&SWS_BILINEAR) else if(flags&SWS_BILINEAR)
mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: BILINEAR scaler, "); MSG_INFO("\nSwScaler: BILINEAR scaler, ");
else if(flags&SWS_BICUBIC) else if(flags&SWS_BICUBIC)
mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: BICUBIC scaler, "); MSG_INFO("\nSwScaler: BICUBIC scaler, ");
else if(flags&SWS_X) else if(flags&SWS_X)
mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: Experimental scaler, "); MSG_INFO("\nSwScaler: Experimental scaler, ");
else if(flags&SWS_POINT) else if(flags&SWS_POINT)
mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: Nearest Neighbor / POINT scaler, "); MSG_INFO("\nSwScaler: Nearest Neighbor / POINT scaler, ");
else if(flags&SWS_AREA) else if(flags&SWS_AREA)
mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: Area Averageing scaler, "); MSG_INFO("\nSwScaler: Area Averageing scaler, ");
else else
mp_msg(MSGT_SWS,MSGL_INFO,"SwScaler: ehh flags invalid?! "); MSG_INFO("\nSwScaler: ehh flags invalid?! ");
mp_msg(MSGT_SWS,MSGL_INFO,"%dx%d %s -> %dx%d%s %s ", if(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16)
srcW,srcH, vo_format_name(srcFormat), dstW,dstH, MSG_INFO("from %s to%s %s ",
(dstFormat==IMGFMT_BGR15 || dstFormat==IMGFMT_BGR16) ? vo_format_name(srcFormat), dither, vo_format_name(dstFormat));
dither : "", vo_format_name(dstFormat)); else
MSG_INFO("from %s to %s ",
vo_format_name(srcFormat), vo_format_name(dstFormat));
if(cpuCaps.hasMMX2) if(cpuCaps.hasMMX2)
mp_msg(MSGT_SWS,MSGL_INFO,"using MMX2\n"); MSG_INFO("using MMX2\n");
else if(cpuCaps.has3DNow) else if(cpuCaps.has3DNow)
mp_msg(MSGT_SWS,MSGL_INFO,"using 3DNOW\n"); MSG_INFO("using 3DNOW\n");
else if(cpuCaps.hasMMX) else if(cpuCaps.hasMMX)
mp_msg(MSGT_SWS,MSGL_INFO,"using MMX\n"); MSG_INFO("using MMX\n");
else else
mp_msg(MSGT_SWS,MSGL_INFO,"using C\n"); MSG_INFO("using C\n");
} }
if((flags & SWS_PRINT_INFO) && verbose) if((flags & SWS_PRINT_INFO) && verbose)
@ -1757,70 +2126,70 @@ SwsContext *getSwsContext(int srcW, int srcH, int srcFormat, int dstW, int dstH,
if(cpuCaps.hasMMX) if(cpuCaps.hasMMX)
{ {
if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR)) if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); MSG_V("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
else else
{ {
if(c->hLumFilterSize==4) if(c->hLumFilterSize==4)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n"); MSG_V("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
else if(c->hLumFilterSize==8) else if(c->hLumFilterSize==8)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n"); MSG_V("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
else else
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n"); MSG_V("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
if(c->hChrFilterSize==4) if(c->hChrFilterSize==4)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n"); MSG_V("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
else if(c->hChrFilterSize==8) else if(c->hChrFilterSize==8)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n"); MSG_V("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
else else
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n"); MSG_V("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
} }
} }
else else
{ {
#ifdef ARCH_X86 #ifdef ARCH_X86
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using X86-Asm scaler for horizontal scaling\n"); MSG_V("SwScaler: using X86-Asm scaler for horizontal scaling\n");
#else #else
if(flags & SWS_FAST_BILINEAR) if(flags & SWS_FAST_BILINEAR)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n"); MSG_V("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
else else
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using C scaler for horizontal scaling\n"); MSG_V("SwScaler: using C scaler for horizontal scaling\n");
#endif #endif
} }
if(isPlanarYUV(dstFormat)) if(isPlanarYUV(dstFormat))
{ {
if(c->vLumFilterSize==1) if(c->vLumFilterSize==1)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
else else
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C"); MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", cpuCaps.hasMMX ? "MMX" : "C");
} }
else else
{ {
if(c->vLumFilterSize==1 && c->vChrFilterSize==2) if(c->vLumFilterSize==1 && c->vChrFilterSize==2)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" MSG_V("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
"SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n",cpuCaps.hasMMX ? "MMX" : "C"); "SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n",cpuCaps.hasMMX ? "MMX" : "C");
else if(c->vLumFilterSize==2 && c->vChrFilterSize==2) else if(c->vLumFilterSize==2 && c->vChrFilterSize==2)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); MSG_V("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C");
else else
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C"); MSG_V("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", cpuCaps.hasMMX ? "MMX" : "C");
} }
if(dstFormat==IMGFMT_BGR24) if(dstFormat==IMGFMT_BGR24)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR24 Converter\n", MSG_V("SwScaler: using %s YV12->BGR24 Converter\n",
cpuCaps.hasMMX2 ? "MMX2" : (cpuCaps.hasMMX ? "MMX" : "C")); cpuCaps.hasMMX2 ? "MMX2" : (cpuCaps.hasMMX ? "MMX" : "C"));
else if(dstFormat==IMGFMT_BGR32) else if(dstFormat==IMGFMT_BGR32)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR32 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); MSG_V("SwScaler: using %s YV12->BGR32 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
else if(dstFormat==IMGFMT_BGR16) else if(dstFormat==IMGFMT_BGR16)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR16 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); MSG_V("SwScaler: using %s YV12->BGR16 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
else if(dstFormat==IMGFMT_BGR15) else if(dstFormat==IMGFMT_BGR15)
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: using %s YV12->BGR15 Converter\n", cpuCaps.hasMMX ? "MMX" : "C"); MSG_V("SwScaler: using %s YV12->BGR15 Converter\n", cpuCaps.hasMMX ? "MMX" : "C");
mp_msg(MSGT_SWS,MSGL_V,"SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); MSG_V("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
} }
if((flags & SWS_PRINT_INFO) && verbose>1) if((flags & SWS_PRINT_INFO) && verbose>1)
{ {
mp_msg(MSGT_SWS,MSGL_DBG2,"SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", MSG_DBG2("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
mp_msg(MSGT_SWS,MSGL_DBG2,"SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", MSG_DBG2("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc); c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
} }
@ -2039,9 +2408,9 @@ void printVec(SwsVector *a){
for(i=0; i<a->length; i++) for(i=0; i<a->length; i++)
{ {
int x= (int)((a->coeff[i]-min)*60.0/range +0.5); int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
printf("%1.3f ", a->coeff[i]); MSG_DBG2("%1.3f ", a->coeff[i]);
for(;x>0; x--) printf(" "); for(;x>0; x--) MSG_DBG2(" ");
printf("|\n"); MSG_DBG2("|\n");
} }
} }

@ -2626,7 +2626,7 @@ static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStridePar
srcStride[1]= srcStrideParam[1]; srcStride[1]= srcStrideParam[1];
srcStride[2]= srcStrideParam[2]; srcStride[2]= srcStrideParam[2];
} }
else if(isPacked(c->srcFormat)){ else if(isPacked(c->srcFormat) || isBGR(c->srcFormat) || isRGB(c->srcFormat)){
src[0]= src[0]=
src[1]= src[1]=
src[2]= srcParam[0]; src[2]= srcParam[0];

@ -156,7 +156,7 @@ const int32_t Inverse_Table_6_9[8][4] = {
{117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */ {117579, 136230, 16907, 35559} /* SMPTE 240M (1987) */
}; };
static void yuv2rgb_c_init (int bpp, int mode); static void yuv2rgb_c_init (unsigned bpp, int mode);
yuv2rgb_fun yuv2rgb; yuv2rgb_fun yuv2rgb;
@ -166,8 +166,8 @@ static void (* yuv2rgb_c_internal) (uint8_t *, uint8_t *,
static void yuv2rgb_c (void * dst, uint8_t * py, static void yuv2rgb_c (void * dst, uint8_t * py,
uint8_t * pu, uint8_t * pv, uint8_t * pu, uint8_t * pv,
int h_size, int v_size, unsigned h_size, unsigned v_size,
int rgb_stride, int y_stride, int uv_stride) unsigned rgb_stride, unsigned y_stride, unsigned uv_stride)
{ {
v_size >>= 1; v_size >>= 1;
@ -182,7 +182,7 @@ static void yuv2rgb_c (void * dst, uint8_t * py,
} }
} }
void yuv2rgb_init (int bpp, int mode) void yuv2rgb_init (unsigned bpp, int mode)
{ {
yuv2rgb = NULL; yuv2rgb = NULL;
#ifdef CAN_COMPILE_X86_ASM #ifdef CAN_COMPILE_X86_ASM
@ -676,7 +676,7 @@ static int div_round (int dividend, int divisor)
return -((-dividend + (divisor>>1)) / divisor); return -((-dividend + (divisor>>1)) / divisor);
} }
static void yuv2rgb_c_init (int bpp, int mode) static void yuv2rgb_c_init (unsigned bpp, int mode)
{ {
int i; int i;
uint8_t table_Y[1024]; uint8_t table_Y[1024];

@ -29,8 +29,8 @@
static void mlib_YUV2ARGB420_32(uint8_t* image, uint8_t* py, static void mlib_YUV2ARGB420_32(uint8_t* image, uint8_t* py,
uint8_t* pu, uint8_t* pv, uint8_t* pu, uint8_t* pv,
int h_size, int v_size, unsigned h_size, unsigned v_size,
int rgb_stride, int y_stride, int uv_stride) unsigned rgb_stride, unsigned y_stride, unsigned uv_stride)
{ {
mlib_VideoColorYUV2ARGB420(image, py, pu, pv, h_size, mlib_VideoColorYUV2ARGB420(image, py, pu, pv, h_size,
v_size, rgb_stride, y_stride, uv_stride); v_size, rgb_stride, y_stride, uv_stride);
@ -38,8 +38,8 @@ static void mlib_YUV2ARGB420_32(uint8_t* image, uint8_t* py,
static void mlib_YUV2ABGR420_32(uint8_t* image, uint8_t* py, static void mlib_YUV2ABGR420_32(uint8_t* image, uint8_t* py,
uint8_t* pu, uint8_t* pv, uint8_t* pu, uint8_t* pv,
int h_size, int v_size, unsigned h_size, unsigned v_size,
int rgb_stride, int y_stride, int uv_stride) unsigned rgb_stride, unsigned y_stride, unsigned uv_stride)
{ {
mlib_VideoColorYUV2ABGR420(image, py, pu, pv, h_size, mlib_VideoColorYUV2ABGR420(image, py, pu, pv, h_size,
v_size, rgb_stride, y_stride, uv_stride); v_size, rgb_stride, y_stride, uv_stride);
@ -47,15 +47,15 @@ static void mlib_YUV2ABGR420_32(uint8_t* image, uint8_t* py,
static void mlib_YUV2RGB420_24(uint8_t* image, uint8_t* py, static void mlib_YUV2RGB420_24(uint8_t* image, uint8_t* py,
uint8_t* pu, uint8_t* pv, uint8_t* pu, uint8_t* pv,
int h_size, int v_size, unsigned h_size, unsigned v_size,
int rgb_stride, int y_stride, int uv_stride) unsigned rgb_stride, unsigned y_stride, unsigned uv_stride)
{ {
mlib_VideoColorYUV2RGB420(image, py, pu, pv, h_size, mlib_VideoColorYUV2RGB420(image, py, pu, pv, h_size,
v_size, rgb_stride, y_stride, uv_stride); v_size, rgb_stride, y_stride, uv_stride);
} }
yuv2rgb_fun yuv2rgb_init_mlib(int bpp, int mode) yuv2rgb_fun yuv2rgb_init_mlib(unsigned bpp, int mode)
{ {
if( bpp == 24 ) if( bpp == 24 )

@ -123,8 +123,8 @@
static inline void RENAME(yuv420_rgb16) (uint8_t * image, uint8_t * py, static inline void RENAME(yuv420_rgb16) (uint8_t * image, uint8_t * py,
uint8_t * pu, uint8_t * pv, uint8_t * pu, uint8_t * pv,
int h_size, int v_size, unsigned h_size, unsigned v_size,
int rgb_stride, int y_stride, int uv_stride) unsigned rgb_stride, unsigned y_stride, unsigned uv_stride)
{ {
int even = 1; int even = 1;
int x, y; int x, y;
@ -228,8 +228,8 @@ YUV2RGB
static inline void RENAME(yuv420_rgb15) (uint8_t * image, uint8_t * py, static inline void RENAME(yuv420_rgb15) (uint8_t * image, uint8_t * py,
uint8_t * pu, uint8_t * pv, uint8_t * pu, uint8_t * pv,
int h_size, int v_size, unsigned h_size, unsigned v_size,
int rgb_stride, int y_stride, int uv_stride) unsigned rgb_stride, unsigned y_stride, unsigned uv_stride)
{ {
int even = 1; int even = 1;
int x, y; int x, y;
@ -329,8 +329,8 @@ YUV2RGB
static inline void RENAME(yuv420_rgb24) (uint8_t * image, uint8_t * py, static inline void RENAME(yuv420_rgb24) (uint8_t * image, uint8_t * py,
uint8_t * pu, uint8_t * pv, uint8_t * pu, uint8_t * pv,
int h_size, int v_size, unsigned h_size, unsigned v_size,
int rgb_stride, int y_stride, int uv_stride) unsigned rgb_stride, unsigned y_stride, unsigned uv_stride)
{ {
int even = 1; int even = 1;
int x, y; int x, y;
@ -488,8 +488,8 @@ YUV2RGB
static inline void RENAME(yuv420_argb32) (uint8_t * image, uint8_t * py, static inline void RENAME(yuv420_argb32) (uint8_t * image, uint8_t * py,
uint8_t * pu, uint8_t * pv, uint8_t * pu, uint8_t * pv,
int h_size, int v_size, unsigned h_size, unsigned v_size,
int rgb_stride, int y_stride, int uv_stride) unsigned rgb_stride, unsigned y_stride, unsigned uv_stride)
{ {
int even = 1; int even = 1;
int x, y; int x, y;
@ -584,7 +584,7 @@ YUV2RGB
__asm__ __volatile__ (EMMS); __asm__ __volatile__ (EMMS);
} }
yuv2rgb_fun RENAME(yuv2rgb_init) (int bpp, int mode) yuv2rgb_fun RENAME(yuv2rgb_init) (unsigned bpp, int mode)
{ {
if (bpp == 15 && mode == MODE_RGB) return RENAME(yuv420_rgb15); if (bpp == 15 && mode == MODE_RGB) return RENAME(yuv420_rgb15);
if (bpp == 16 && mode == MODE_RGB) return RENAME(yuv420_rgb16); if (bpp == 16 && mode == MODE_RGB) return RENAME(yuv420_rgb16);

Loading…
Cancel
Save