- Place curly brackets in the same line as while/for/if/switch/else/do;
- Place curly brackets at column 0 in the next line starting a function.

Originally committed as revision 29523 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscale
release/0.6
Ramiro Polla 16 years ago
parent 9dc6bb7b9f
commit dd68318cee
  1. 20
      libswscale/colorspace-test.c
  2. 17
      libswscale/mlib/yuv2rgb_mlib.c
  3. 3
      libswscale/options.c
  4. 9
      libswscale/ppc/swscale_altivec_template.c
  5. 6
      libswscale/ppc/yuv2rgb_altivec.c
  6. 39
      libswscale/rgb2rgb.c
  7. 235
      libswscale/rgb2rgb_template.c
  8. 9
      libswscale/sparc/yuv2rgb_vis.c
  9. 24
      libswscale/swscale-example.c
  10. 627
      libswscale/swscale.c
  11. 2
      libswscale/swscale_internal.h
  12. 310
      libswscale/swscale_template.c
  13. 16
      libswscale/x86/yuv2rgb_mmx.c
  14. 23
      libswscale/x86/yuv2rgb_template.c
  15. 5
      libswscale/yuv2rgb.c

@ -71,7 +71,7 @@ int main(int argc, char **argv)
av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps); av_log(NULL, AV_LOG_INFO, "CPU capabilities forced to %x\n", cpu_caps);
sws_rgb2rgb_init(cpu_caps); sws_rgb2rgb_init(cpu_caps);
for(funcNum=0; ; funcNum++){ for(funcNum=0; ; funcNum++) {
struct func_info_s { struct func_info_s {
int src_bpp; int src_bpp;
int dst_bpp; int dst_bpp;
@ -118,13 +118,13 @@ int main(int argc, char **argv)
av_log(NULL, AV_LOG_INFO,"."); av_log(NULL, AV_LOG_INFO,".");
memset(srcBuffer, srcByte, SIZE); memset(srcBuffer, srcByte, SIZE);
for(width=63; width>0; width--){ for(width=63; width>0; width--) {
int dstOffset; int dstOffset;
for(dstOffset=128; dstOffset<196; dstOffset+=4){ for(dstOffset=128; dstOffset<196; dstOffset+=4) {
int srcOffset; int srcOffset;
memset(dstBuffer, dstByte, SIZE); memset(dstBuffer, dstByte, SIZE);
for(srcOffset=128; srcOffset<196; srcOffset+=4){ for(srcOffset=128; srcOffset<196; srcOffset+=4) {
uint8_t *src= srcBuffer+srcOffset; uint8_t *src= srcBuffer+srcOffset;
uint8_t *dst= dstBuffer+dstOffset; uint8_t *dst= dstBuffer+dstOffset;
const char *name=NULL; const char *name=NULL;
@ -139,24 +139,24 @@ int main(int argc, char **argv)
if(!srcBpp) break; if(!srcBpp) break;
for(i=0; i<SIZE; i++){ for(i=0; i<SIZE; i++) {
if(srcBuffer[i]!=srcByte){ if(srcBuffer[i]!=srcByte) {
av_log(NULL, AV_LOG_INFO, "src damaged at %d w:%d src:%d dst:%d %s\n", av_log(NULL, AV_LOG_INFO, "src damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name); i, width, srcOffset, dstOffset, name);
failed=1; failed=1;
break; break;
} }
} }
for(i=0; i<dstOffset; i++){ for(i=0; i<dstOffset; i++) {
if(dstBuffer[i]!=dstByte){ if(dstBuffer[i]!=dstByte) {
av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n", av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name); i, width, srcOffset, dstOffset, name);
failed=1; failed=1;
break; break;
} }
} }
for(i=dstOffset + width*dstBpp; i<SIZE; i++){ for(i=dstOffset + width*dstBpp; i<SIZE; i++) {
if(dstBuffer[i]!=dstByte){ if(dstBuffer[i]!=dstByte) {
av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n", av_log(NULL, AV_LOG_INFO, "dst damaged at %d w:%d src:%d dst:%d %s\n",
i, width, srcOffset, dstOffset, name); i, width, srcOffset, dstOffset, name);
failed=1; failed=1;

@ -31,8 +31,9 @@
#include "libswscale/swscale.h" #include "libswscale/swscale.h"
static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
if(c->srcFormat == PIX_FMT_YUV422P){ {
if(c->srcFormat == PIX_FMT_YUV422P) {
srcStride[1] *= 2; srcStride[1] *= 2;
srcStride[2] *= 2; srcStride[2] *= 2;
} }
@ -45,8 +46,9 @@ static int mlib_YUV2ARGB420_32(SwsContext *c, uint8_t* src[], int srcStride[], i
} }
static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
if(c->srcFormat == PIX_FMT_YUV422P){ {
if(c->srcFormat == PIX_FMT_YUV422P) {
srcStride[1] *= 2; srcStride[1] *= 2;
srcStride[2] *= 2; srcStride[2] *= 2;
} }
@ -59,8 +61,9 @@ static int mlib_YUV2ABGR420_32(SwsContext *c, uint8_t* src[], int srcStride[], i
} }
static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
if(c->srcFormat == PIX_FMT_YUV422P){ {
if(c->srcFormat == PIX_FMT_YUV422P) {
srcStride[1] *= 2; srcStride[1] *= 2;
srcStride[2] *= 2; srcStride[2] *= 2;
} }
@ -75,7 +78,7 @@ static int mlib_YUV2RGB420_24(SwsContext *c, uint8_t* src[], int srcStride[], in
SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c) SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c)
{ {
switch(c->dstFormat){ switch(c->dstFormat) {
case PIX_FMT_RGB24: return mlib_YUV2RGB420_24; case PIX_FMT_RGB24: return mlib_YUV2RGB420_24;
case PIX_FMT_BGR32: return mlib_YUV2ARGB420_32; case PIX_FMT_BGR32: return mlib_YUV2ARGB420_32;
case PIX_FMT_RGB32: return mlib_YUV2ABGR420_32; case PIX_FMT_RGB32: return mlib_YUV2ABGR420_32;

@ -23,7 +23,8 @@
#include "swscale.h" #include "swscale.h"
#include "swscale_internal.h" #include "swscale_internal.h"
static const char * sws_context_to_name(void * ptr) { static const char * sws_context_to_name(void * ptr)
{
return "swscaler"; return "swscaler";
} }

@ -24,7 +24,8 @@
#define vzero vec_splat_s32(0) #define vzero vec_splat_s32(0)
static inline void static inline void
altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) { altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW)
{
register int i; register int i;
vector unsigned int altivec_vectorShiftInt19 = vector unsigned int altivec_vectorShiftInt19 =
vec_add(vec_splat_u32(10), vec_splat_u32(9)); vec_add(vec_splat_u32(10), vec_splat_u32(9));
@ -389,7 +390,8 @@ static inline void hScale_altivec_real(int16_t *dst, int dstW,
} }
static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) { int srcSliceH, uint8_t* dstParam[], int dstStride_a[])
{
uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY; uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
uint8_t *ysrc = src[0]; uint8_t *ysrc = src[0];
@ -466,7 +468,8 @@ static inline int yv12toyuy2_unscaled_altivec(SwsContext *c, uint8_t* src[], int
} }
static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int yv12touyvy_unscaled_altivec(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dstParam[], int dstStride_a[]) { int srcSliceH, uint8_t* dstParam[], int dstStride_a[])
{
uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY; uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
// yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]); // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
uint8_t *ysrc = src[0]; uint8_t *ysrc = src[0];

@ -714,7 +714,7 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
if ((c->srcH & 0x1) != 0) if ((c->srcH & 0x1) != 0)
return NULL; return NULL;
switch(c->dstFormat){ switch(c->dstFormat) {
case PIX_FMT_RGB24: case PIX_FMT_RGB24:
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n"); av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n");
return altivec_yuv2_rgb24; return altivec_yuv2_rgb24;
@ -738,7 +738,7 @@ SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c)
break; break;
case PIX_FMT_UYVY422: case PIX_FMT_UYVY422:
switch(c->dstFormat){ switch(c->dstFormat) {
case PIX_FMT_BGR32: case PIX_FMT_BGR32:
av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n"); av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n");
return altivec_uyvy_rgb32; return altivec_uyvy_rgb32;
@ -800,7 +800,7 @@ ff_yuv2packedX_altivec(SwsContext *c,
out = (vector unsigned char *)dest; out = (vector unsigned char *)dest;
for (i=0; i<dstW; i+=16){ for (i=0; i<dstW; i+=16) {
Y0 = RND; Y0 = RND;
Y1 = RND; Y1 = RND;
/* extract 16 coeffs from lumSrc */ /* extract 16 coeffs from lumSrc */

@ -196,7 +196,8 @@ DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
32-bit C version, and and&add trick by Michael Niedermayer 32-bit C version, and and&add trick by Michael Niedermayer
*/ */
void sws_rgb2rgb_init(int flags){ void sws_rgb2rgb_init(int flags)
{
#if (HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX) && CONFIG_GPL #if (HAVE_MMX2 || HAVE_AMD3DNOW || HAVE_MMX) && CONFIG_GPL
if (flags & SWS_CPU_CAPS_MMX2) if (flags & SWS_CPU_CAPS_MMX2)
rgb2rgb_init_MMX2(); rgb2rgb_init_MMX2();
@ -227,8 +228,7 @@ void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const
{ {
long i; long i;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
//FIXME slow? //FIXME slow?
dst[0]= palette[src[i]*4+0]; dst[0]= palette[src[i]*4+0];
dst[1]= palette[src[i]*4+1]; dst[1]= palette[src[i]*4+1];
@ -273,8 +273,7 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
{ {
long i; long i;
long num_pixels = src_size >> 2; long num_pixels = src_size >> 2;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
/* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */ /* RGB32 (= A,B,G,R) -> BGR24 (= B,G,R) */
dst[3*i + 0] = src[4*i + 1]; dst[3*i + 0] = src[4*i + 1];
@ -291,8 +290,7 @@ void rgb32to24(const uint8_t *src, uint8_t *dst, long src_size)
void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size) void rgb24to32(const uint8_t *src, uint8_t *dst, long src_size)
{ {
long i; long i;
for (i=0; 3*i<src_size; i++) for (i=0; 3*i<src_size; i++) {
{
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
/* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */ /* RGB24 (= R,G,B) -> BGR32 (= A,R,G,B) */
dst[4*i + 0] = 255; dst[4*i + 0] = 255;
@ -314,8 +312,7 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
uint8_t *d = dst; uint8_t *d = dst;
const uint16_t *s = (const uint16_t *)src; const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2; end = s + src_size/2;
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
@ -338,8 +335,7 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, long src_size)
uint8_t *d = dst; uint8_t *d = dst;
const uint16_t *s = (const uint16_t *)src; const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2; end = s + src_size/2;
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
*d++ = (bgr&0xF800)>>8; *d++ = (bgr&0xF800)>>8;
@ -353,8 +349,7 @@ void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
unsigned rgb = ((const uint16_t*)src)[i]; unsigned rgb = ((const uint16_t*)src)[i];
((uint16_t*)dst)[i] = (rgb>>11) | (rgb&0x7E0) | (rgb<<11); ((uint16_t*)dst)[i] = (rgb>>11) | (rgb&0x7E0) | (rgb<<11);
} }
@ -365,8 +360,7 @@ void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
unsigned rgb = ((const uint16_t*)src)[i]; unsigned rgb = ((const uint16_t*)src)[i];
((uint16_t*)dst)[i] = (rgb>>11) | ((rgb&0x7C0)>>1) | ((rgb&0x1F)<<10); ((uint16_t*)dst)[i] = (rgb>>11) | ((rgb&0x7C0)>>1) | ((rgb&0x1F)<<10);
} }
@ -378,8 +372,7 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size)
uint8_t *d = dst; uint8_t *d = dst;
const uint16_t *s = (const uint16_t *)src; const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2; end = s + src_size/2;
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
@ -402,8 +395,7 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, long src_size)
uint8_t *d = dst; uint8_t *d = dst;
const uint16_t *s = (const uint16_t *)src; const uint16_t *s = (const uint16_t *)src;
end = s + src_size/2; end = s + src_size/2;
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
*d++ = (bgr&0x7C00)>>7; *d++ = (bgr&0x7C00)>>7;
@ -417,8 +409,7 @@ void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size)
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
unsigned rgb = ((const uint16_t*)src)[i]; unsigned rgb = ((const uint16_t*)src)[i];
((uint16_t*)dst)[i] = ((rgb&0x7C00)>>10) | ((rgb&0x3E0)<<1) | (rgb<<11); ((uint16_t*)dst)[i] = ((rgb&0x7C00)>>10) | ((rgb&0x3E0)<<1) | (rgb<<11);
} }
@ -429,8 +420,7 @@ void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size)
long i; long i;
long num_pixels = src_size >> 1; long num_pixels = src_size >> 1;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
unsigned br; unsigned br;
unsigned rgb = ((const uint16_t*)src)[i]; unsigned rgb = ((const uint16_t*)src)[i];
br = rgb&0x7c1F; br = rgb&0x7c1F;
@ -442,8 +432,7 @@ void bgr8torgb8(const uint8_t *src, uint8_t *dst, long src_size)
{ {
long i; long i;
long num_pixels = src_size; long num_pixels = src_size;
for (i=0; i<num_pixels; i++) for (i=0; i<num_pixels; i++) {
{
unsigned b,g,r; unsigned b,g,r;
register uint8_t rgb; register uint8_t rgb;
rgb = src[i]; rgb = src[i];

@ -84,8 +84,7 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 23; mm_end = end - 23;
__asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
@ -113,8 +112,7 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
/* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
*dest++ = 255; *dest++ = 255;
@ -143,8 +141,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s
#if HAVE_MMX #if HAVE_MMX
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 31; mm_end = end - 31;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
@ -199,8 +196,7 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
/* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
s++; s++;
@ -234,8 +230,7 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(PREFETCH" %0"::"m"(*s)); __asm__ volatile(PREFETCH" %0"::"m"(*s));
__asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); __asm__ volatile("movq %0, %%mm4"::"m"(mask15s));
mm_end = end - 15; mm_end = end - 15;
while (s<mm_end) while (s<mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
@ -258,15 +253,13 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
mm_end = end - 3; mm_end = end - 3;
while (s < mm_end) while (s < mm_end) {
{
register unsigned x= *((const uint32_t *)s); register unsigned x= *((const uint32_t *)s);
*((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
d+=4; d+=4;
s+=4; s+=4;
} }
if (s < end) if (s < end) {
{
register unsigned short x= *((const uint16_t *)s); register unsigned short x= *((const uint16_t *)s);
*((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
} }
@ -284,8 +277,7 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg));
__asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); __asm__ volatile("movq %0, %%mm6"::"m"(mask15b));
mm_end = end - 15; mm_end = end - 15;
while (s<mm_end) while (s<mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
@ -312,15 +304,13 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
mm_end = end - 3; mm_end = end - 3;
while (s < mm_end) while (s < mm_end) {
{
register uint32_t x= *((const uint32_t*)s); register uint32_t x= *((const uint32_t*)s);
*((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
s+=4; s+=4;
d+=4; d+=4;
} }
if (s < end) if (s < end) {
{
register uint16_t x= *((const uint16_t*)s); register uint16_t x= *((const uint16_t*)s);
*((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
} }
@ -378,8 +368,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_
"movq %0, %%mm7 \n\t" "movq %0, %%mm7 \n\t"
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_16mask),"m"(green_16mask)); ::"m"(red_16mask),"m"(green_16mask));
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
@ -417,8 +406,7 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register int rgb = *(const uint32_t*)s; s += 4; register int rgb = *(const uint32_t*)s; s += 4;
*d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
} }
@ -440,8 +428,7 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long s
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_16mask),"m"(green_16mask)); ::"m"(red_16mask),"m"(green_16mask));
mm_end = end - 15; mm_end = end - 15;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
@ -478,8 +465,7 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register int rgb = *(const uint32_t*)s; s += 4; register int rgb = *(const uint32_t*)s; s += 4;
*d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
} }
@ -537,8 +523,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_
"movq %0, %%mm7 \n\t" "movq %0, %%mm7 \n\t"
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_15mask),"m"(green_15mask)); ::"m"(red_15mask),"m"(green_15mask));
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
@ -576,8 +561,7 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register int rgb = *(const uint32_t*)s; s += 4; register int rgb = *(const uint32_t*)s; s += 4;
*d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
} }
@ -599,8 +583,7 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long s
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_15mask),"m"(green_15mask)); ::"m"(red_15mask),"m"(green_15mask));
mm_end = end - 15; mm_end = end - 15;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
@ -637,8 +620,7 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register int rgb = *(const uint32_t*)s; s += 4; register int rgb = *(const uint32_t*)s; s += 4;
*d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
} }
@ -660,8 +642,7 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_16mask),"m"(green_16mask)); ::"m"(red_16mask),"m"(green_16mask));
mm_end = end - 11; mm_end = end - 11;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
@ -698,8 +679,7 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
const int b = *s++; const int b = *s++;
const int g = *s++; const int g = *s++;
const int r = *s++; const int r = *s++;
@ -723,8 +703,7 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_16mask),"m"(green_16mask)); ::"m"(red_16mask),"m"(green_16mask));
mm_end = end - 15; mm_end = end - 15;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
@ -761,8 +740,7 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
const int r = *s++; const int r = *s++;
const int g = *s++; const int g = *s++;
const int b = *s++; const int b = *s++;
@ -786,8 +764,7 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_15mask),"m"(green_15mask)); ::"m"(red_15mask),"m"(green_15mask));
mm_end = end - 11; mm_end = end - 11;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
@ -824,8 +801,7 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
const int b = *s++; const int b = *s++;
const int g = *s++; const int g = *s++;
const int r = *s++; const int r = *s++;
@ -849,8 +825,7 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
::"m"(red_15mask),"m"(green_15mask)); ::"m"(red_15mask),"m"(green_15mask));
mm_end = end - 15; mm_end = end - 15;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movd %1, %%mm0 \n\t" "movd %1, %%mm0 \n\t"
@ -887,8 +862,7 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
const int r = *s++; const int r = *s++;
const int g = *s++; const int g = *s++;
const int b = *s++; const int b = *s++;
@ -929,8 +903,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s
#if HAVE_MMX #if HAVE_MMX
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 7; mm_end = end - 7;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
@ -1049,8 +1022,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
*d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x1F)<<3;
@ -1071,8 +1043,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s
#if HAVE_MMX #if HAVE_MMX
__asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
mm_end = end - 7; mm_end = end - 7;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
@ -1190,8 +1161,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long s
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
*d++ = (bgr&0x1F)<<3; *d++ = (bgr&0x1F)<<3;
@ -1233,8 +1203,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
__asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
mm_end = end - 3; mm_end = end - 3;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
@ -1256,8 +1225,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
@ -1288,8 +1256,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
__asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
mm_end = end - 3; mm_end = end - 3;
while (s < mm_end) while (s < mm_end) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
@ -1311,8 +1278,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_
__asm__ volatile(SFENCE:::"memory"); __asm__ volatile(SFENCE:::"memory");
__asm__ volatile(EMMS:::"memory"); __asm__ volatile(EMMS:::"memory");
#endif #endif
while (s < end) while (s < end) {
{
register uint16_t bgr; register uint16_t bgr;
bgr = *s++; bgr = *s++;
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
@ -1453,8 +1419,7 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long s
src-= src_size; src-= src_size;
dst-= src_size; dst-= src_size;
#endif #endif
for (i=0; i<src_size; i+=3) for (i=0; i<src_size; i+=3) {
{
register uint8_t x; register uint8_t x;
x = src[i + 2]; x = src[i + 2];
dst[i + 1] = src[i + 1]; dst[i + 1] = src[i + 1];
@ -1469,8 +1434,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
{ {
long y; long y;
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
for (y=0; y<height; y++) for (y=0; y<height; y++) {
{
#if HAVE_MMX #if HAVE_MMX
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__ volatile( __asm__ volatile(
@ -1530,7 +1494,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
const uint32_t *yc = (uint32_t *) ysrc; const uint32_t *yc = (uint32_t *) ysrc;
const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
for (i = 0; i < chromWidth; i += 8){ for (i = 0; i < chromWidth; i += 8) {
uint64_t y1, y2, yuv1, yuv2; uint64_t y1, y2, yuv1, yuv2;
uint64_t u, v; uint64_t u, v;
/* Prefetch */ /* Prefetch */
@ -1559,7 +1523,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
int i; int i;
uint64_t *ldst = (uint64_t *) dst; uint64_t *ldst = (uint64_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i += 2){ for (i = 0; i < chromWidth; i += 2) {
uint64_t k, l; uint64_t k, l;
k = yc[0] + (uc[0] << 8) + k = yc[0] + (uc[0] << 8) +
(yc[1] << 16) + (vc[0] << 24); (yc[1] << 16) + (vc[0] << 24);
@ -1574,7 +1538,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
#else #else
int i, *idst = (int32_t *) dst; int i, *idst = (int32_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i++){ for (i = 0; i < chromWidth; i++) {
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
*idst++ = (yc[0] << 24)+ (uc[0] << 16) + *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
(yc[1] << 8) + (vc[0] << 0); (yc[1] << 8) + (vc[0] << 0);
@ -1588,8 +1552,7 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
} }
#endif #endif
#endif #endif
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
{
usrc += chromStride; usrc += chromStride;
vsrc += chromStride; vsrc += chromStride;
} }
@ -1621,8 +1584,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
{ {
long y; long y;
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
for (y=0; y<height; y++) for (y=0; y<height; y++) {
{
#if HAVE_MMX #if HAVE_MMX
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__ volatile( __asm__ volatile(
@ -1665,7 +1627,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
int i; int i;
uint64_t *ldst = (uint64_t *) dst; uint64_t *ldst = (uint64_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i += 2){ for (i = 0; i < chromWidth; i += 2) {
uint64_t k, l; uint64_t k, l;
k = uc[0] + (yc[0] << 8) + k = uc[0] + (yc[0] << 8) +
(vc[0] << 16) + (yc[1] << 24); (vc[0] << 16) + (yc[1] << 24);
@ -1680,7 +1642,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
#else #else
int i, *idst = (int32_t *) dst; int i, *idst = (int32_t *) dst;
const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
for (i = 0; i < chromWidth; i++){ for (i = 0; i < chromWidth; i++) {
#if HAVE_BIGENDIAN #if HAVE_BIGENDIAN
*idst++ = (uc[0] << 24)+ (yc[0] << 16) + *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
(vc[0] << 8) + (yc[1] << 0); (vc[0] << 8) + (yc[1] << 0);
@ -1694,8 +1656,7 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
} }
#endif #endif
#endif #endif
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
{
usrc += chromStride; usrc += chromStride;
vsrc += chromStride; vsrc += chromStride;
} }
@ -1751,8 +1712,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
{ {
long y; long y;
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
for (y=0; y<height; y+=2) for (y=0; y<height; y+=2) {
{
#if HAVE_MMX #if HAVE_MMX
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
@ -1837,8 +1797,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
); );
#else #else
long i; long i;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
ydst[2*i+0] = src[4*i+0]; ydst[2*i+0] = src[4*i+0];
udst[i] = src[4*i+1]; udst[i] = src[4*i+1];
ydst[2*i+1] = src[4*i+2]; ydst[2*i+1] = src[4*i+2];
@ -1847,8 +1806,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
ydst += lumStride; ydst += lumStride;
src += srcStride; src += srcStride;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
ydst[2*i+0] = src[4*i+0]; ydst[2*i+0] = src[4*i+0];
ydst[2*i+1] = src[4*i+2]; ydst[2*i+1] = src[4*i+2];
} }
@ -1882,7 +1840,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
dst[0]= src[0]; dst[0]= src[0];
// first line // first line
for (x=0; x<srcWidth-1; x++){ for (x=0; x<srcWidth-1; x++) {
dst[2*x+1]= (3*src[x] + src[x+1])>>2; dst[2*x+1]= (3*src[x] + src[x+1])>>2;
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
} }
@ -1890,7 +1848,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
dst+= dstStride; dst+= dstStride;
for (y=1; y<srcHeight; y++){ for (y=1; y<srcHeight; y++) {
#if HAVE_MMX2 || HAVE_AMD3DNOW #if HAVE_MMX2 || HAVE_AMD3DNOW
const x86_reg mmxSize= srcWidth&~15; const x86_reg mmxSize= srcWidth&~15;
__asm__ volatile( __asm__ volatile(
@ -1941,7 +1899,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
dst[0 ]= (3*src[0] + src[srcStride])>>2; dst[0 ]= (3*src[0] + src[srcStride])>>2;
dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
for (x=mmxSize-1; x<srcWidth-1; x++){ for (x=mmxSize-1; x<srcWidth-1; x++) {
dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
@ -1958,13 +1916,13 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, long srcWi
#if 1 #if 1
dst[0]= src[0]; dst[0]= src[0];
for (x=0; x<srcWidth-1; x++){ for (x=0; x<srcWidth-1; x++) {
dst[2*x+1]= (3*src[x] + src[x+1])>>2; dst[2*x+1]= (3*src[x] + src[x+1])>>2;
dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
} }
dst[2*srcWidth-1]= src[srcWidth-1]; dst[2*srcWidth-1]= src[srcWidth-1];
#else #else
for (x=0; x<srcWidth; x++){ for (x=0; x<srcWidth; x++) {
dst[2*x+0]= dst[2*x+0]=
dst[2*x+1]= src[x]; dst[2*x+1]= src[x];
} }
@ -1989,8 +1947,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
{ {
long y; long y;
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
for (y=0; y<height; y+=2) for (y=0; y<height; y+=2) {
{
#if HAVE_MMX #if HAVE_MMX
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"REG_a", %%"REG_a" \n\t"
@ -2075,8 +2032,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
); );
#else #else
long i; long i;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
udst[i] = src[4*i+0]; udst[i] = src[4*i+0];
ydst[2*i+0] = src[4*i+1]; ydst[2*i+0] = src[4*i+1];
vdst[i] = src[4*i+2]; vdst[i] = src[4*i+2];
@ -2085,8 +2041,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
ydst += lumStride; ydst += lumStride;
src += srcStride; src += srcStride;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
ydst[2*i+0] = src[4*i+1]; ydst[2*i+0] = src[4*i+1];
ydst[2*i+1] = src[4*i+3]; ydst[2*i+1] = src[4*i+3];
} }
@ -2117,11 +2072,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
long y; long y;
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
#if HAVE_MMX #if HAVE_MMX
for (y=0; y<height-2; y+=2) for (y=0; y<height-2; y+=2) {
{
long i; long i;
for (i=0; i<2; i++) for (i=0; i<2; i++) {
{
__asm__ volatile( __asm__ volatile(
"mov %2, %%"REG_a" \n\t" "mov %2, %%"REG_a" \n\t"
"movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
@ -2355,11 +2308,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
#else #else
y=0; y=0;
#endif #endif
for (; y<height; y+=2) for (; y<height; y+=2) {
{
long i; long i;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
unsigned int b = src[6*i+0]; unsigned int b = src[6*i+0];
unsigned int g = src[6*i+1]; unsigned int g = src[6*i+1];
unsigned int r = src[6*i+2]; unsigned int r = src[6*i+2];
@ -2382,8 +2333,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
ydst += lumStride; ydst += lumStride;
src += srcStride; src += srcStride;
for (i=0; i<chromWidth; i++) for (i=0; i<chromWidth; i++) {
{
unsigned int b = src[6*i+0]; unsigned int b = src[6*i+0];
unsigned int g = src[6*i+1]; unsigned int g = src[6*i+1];
unsigned int r = src[6*i+2]; unsigned int r = src[6*i+2];
@ -2408,11 +2358,11 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
long width, long height, long src1Stride, long width, long height, long src1Stride,
long src2Stride, long dstStride){ long src2Stride, long dstStride)
{
long h; long h;
for (h=0; h < height; h++) for (h=0; h < height; h++) {
{
long w; long w;
#if HAVE_MMX #if HAVE_MMX
@ -2462,14 +2412,12 @@ static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
: "memory", "%"REG_a : "memory", "%"REG_a
); );
#endif #endif
for (w= (width&(~15)); w < width; w++) for (w= (width&(~15)); w < width; w++) {
{
dest[2*w+0] = src1[w]; dest[2*w+0] = src1[w];
dest[2*w+1] = src2[w]; dest[2*w+1] = src2[w];
} }
#else #else
for (w=0; w < width; w++) for (w=0; w < width; w++) {
{
dest[2*w+0] = src1[w]; dest[2*w+0] = src1[w];
dest[2*w+1] = src2[w]; dest[2*w+1] = src2[w];
} }
@ -2502,13 +2450,12 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
PREFETCH" %1 \n\t" PREFETCH" %1 \n\t"
::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
#endif #endif
for (y=0;y<h;y++){ for (y=0;y<h;y++) {
const uint8_t* s1=src1+srcStride1*(y>>1); const uint8_t* s1=src1+srcStride1*(y>>1);
uint8_t* d=dst1+dstStride1*y; uint8_t* d=dst1+dstStride1*y;
x=0; x=0;
#if HAVE_MMX #if HAVE_MMX
for (;x<w-31;x+=32) for (;x<w-31;x+=32) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
@ -2542,13 +2489,12 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2,
#endif #endif
for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
} }
for (y=0;y<h;y++){ for (y=0;y<h;y++) {
const uint8_t* s2=src2+srcStride2*(y>>1); const uint8_t* s2=src2+srcStride2*(y>>1);
uint8_t* d=dst2+dstStride2*y; uint8_t* d=dst2+dstStride2*y;
x=0; x=0;
#if HAVE_MMX #if HAVE_MMX
for (;x<w-31;x+=32) for (;x<w-31;x+=32) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32%1 \n\t" PREFETCH" 32%1 \n\t"
"movq %1, %%mm0 \n\t" "movq %1, %%mm0 \n\t"
@ -2600,15 +2546,14 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
x86_reg x; x86_reg x;
long y,w,h; long y,w,h;
w=width/2; h=height; w=width/2; h=height;
for (y=0;y<h;y++){ for (y=0;y<h;y++) {
const uint8_t* yp=src1+srcStride1*y; const uint8_t* yp=src1+srcStride1*y;
const uint8_t* up=src2+srcStride2*(y>>2); const uint8_t* up=src2+srcStride2*(y>>2);
const uint8_t* vp=src3+srcStride3*(y>>2); const uint8_t* vp=src3+srcStride3*(y>>2);
uint8_t* d=dst+dstStride*y; uint8_t* d=dst+dstStride*y;
x=0; x=0;
#if HAVE_MMX #if HAVE_MMX
for (;x<w-7;x+=8) for (;x<w-7;x+=8) {
{
__asm__ volatile( __asm__ volatile(
PREFETCH" 32(%1, %0) \n\t" PREFETCH" 32(%1, %0) \n\t"
PREFETCH" 32(%2, %0) \n\t" PREFETCH" 32(%2, %0) \n\t"
@ -2661,8 +2606,7 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2
:"memory"); :"memory");
} }
#endif #endif
for (; x<w; x++) for (; x<w; x++) {
{
const long x2 = x<<2; const long x2 = x<<2;
d[8*x+0] = yp[x2]; d[8*x+0] = yp[x2];
d[8*x+1] = up[x]; d[8*x+1] = up[x];
@ -2690,7 +2634,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count
count= - count; count= - count;
#if HAVE_MMX #if HAVE_MMX
if(count <= -16){ if(count <= -16) {
count += 15; count += 15;
__asm__ volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
@ -2716,7 +2660,7 @@ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count
count -= 15; count -= 15;
} }
#endif #endif
while(count<0){ while(count<0) {
dst[count]= src[2*count]; dst[count]= src[2*count];
count++; count++;
} }
@ -2729,7 +2673,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds
src += 4*count; src += 4*count;
count= - count; count= - count;
#if HAVE_MMX #if HAVE_MMX
if(count <= -8){ if(count <= -8) {
count += 7; count += 7;
__asm__ volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
@ -2763,7 +2707,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds
count -= 7; count -= 7;
} }
#endif #endif
while(count<0){ while(count<0) {
dst0[count]= src[4*count+0]; dst0[count]= src[4*count+0];
dst1[count]= src[4*count+2]; dst1[count]= src[4*count+2];
count++; count++;
@ -2778,7 +2722,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u
src1 += 4*count; src1 += 4*count;
count= - count; count= - count;
#ifdef PAVGB #ifdef PAVGB
if(count <= -8){ if(count <= -8) {
count += 7; count += 7;
__asm__ volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
@ -2816,7 +2760,7 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u
count -= 7; count -= 7;
} }
#endif #endif
while(count<0){ while(count<0) {
dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
count++; count++;
@ -2830,7 +2774,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst
src += 4*count; src += 4*count;
count= - count; count= - count;
#if HAVE_MMX #if HAVE_MMX
if(count <= -8){ if(count <= -8) {
count += 7; count += 7;
__asm__ volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
@ -2865,7 +2809,7 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst
} }
#endif #endif
src++; src++;
while(count<0){ while(count<0) {
dst0[count]= src[4*count+0]; dst0[count]= src[4*count+0];
dst1[count]= src[4*count+2]; dst1[count]= src[4*count+2];
count++; count++;
@ -2880,7 +2824,7 @@ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, ui
src1 += 4*count; src1 += 4*count;
count= - count; count= - count;
#ifdef PAVGB #ifdef PAVGB
if(count <= -8){ if(count <= -8) {
count += 7; count += 7;
__asm__ volatile( __asm__ volatile(
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
@ -2920,7 +2864,7 @@ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, ui
#endif #endif
src0++; src0++;
src1++; src1++;
while(count<0){ while(count<0) {
dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
count++; count++;
@ -2934,9 +2878,9 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
long y; long y;
const long chromWidth= -((-width)>>1); const long chromWidth= -((-width)>>1);
for (y=0; y<height; y++){ for (y=0; y<height; y++) {
RENAME(extract_even)(src, ydst, width); RENAME(extract_even)(src, ydst, width);
if(y&1){ if(y&1) {
RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth); RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth);
udst+= chromStride; udst+= chromStride;
vdst+= chromStride; vdst+= chromStride;
@ -2961,7 +2905,7 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
long y; long y;
const long chromWidth= -((-width)>>1); const long chromWidth= -((-width)>>1);
for (y=0; y<height; y++){ for (y=0; y<height; y++) {
RENAME(extract_even)(src, ydst, width); RENAME(extract_even)(src, ydst, width);
RENAME(extract_odd2)(src, udst, vdst, chromWidth); RENAME(extract_odd2)(src, udst, vdst, chromWidth);
@ -2986,9 +2930,9 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
long y; long y;
const long chromWidth= -((-width)>>1); const long chromWidth= -((-width)>>1);
for (y=0; y<height; y++){ for (y=0; y<height; y++) {
RENAME(extract_even)(src+1, ydst, width); RENAME(extract_even)(src+1, ydst, width);
if(y&1){ if(y&1) {
RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
udst+= chromStride; udst+= chromStride;
vdst+= chromStride; vdst+= chromStride;
@ -3013,7 +2957,7 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
long y; long y;
const long chromWidth= -((-width)>>1); const long chromWidth= -((-width)>>1);
for (y=0; y<height; y++){ for (y=0; y<height; y++) {
RENAME(extract_even)(src+1, ydst, width); RENAME(extract_even)(src+1, ydst, width);
RENAME(extract_even2)(src, udst, vdst, chromWidth); RENAME(extract_even2)(src, udst, vdst, chromWidth);
@ -3031,7 +2975,8 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co
#endif #endif
} }
static inline void RENAME(rgb2rgb_init)(void){ static inline void RENAME(rgb2rgb_init)(void)
{
rgb15to16 = RENAME(rgb15to16); rgb15to16 = RENAME(rgb15to16);
rgb15tobgr24 = RENAME(rgb15tobgr24); rgb15tobgr24 = RENAME(rgb15tobgr24);
rgb15to32 = RENAME(rgb15to32); rgb15to32 = RENAME(rgb15to32);

@ -82,7 +82,8 @@
// FIXME: must be changed to set alpha to 255 instead of 0 // FIXME: must be changed to set alpha to 255 instead of 0
static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, out1, out2, out3, out4, out5, out6; int y, out1, out2, out3, out4, out5, out6;
for(y=0;y < srcSliceH;++y) { for(y=0;y < srcSliceH;++y) {
@ -134,7 +135,8 @@ static int vis_420P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s
// FIXME: must be changed to set alpha to 255 instead of 0 // FIXME: must be changed to set alpha to 255 instead of 0
static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, out1, out2, out3, out4, out5, out6; int y, out1, out2, out3, out4, out5, out6;
for(y=0;y < srcSliceH;++y) { for(y=0;y < srcSliceH;++y) {
@ -184,7 +186,8 @@ static int vis_422P_ARGB32(SwsContext *c, uint8_t* src[], int srcStride[], int s
return srcSliceH; return srcSliceH;
} }
SwsFunc ff_yuv2rgb_init_vis(SwsContext *c){ SwsFunc ff_yuv2rgb_init_vis(SwsContext *c)
{
c->sparc_coeffs[5]=c->yCoeff; c->sparc_coeffs[5]=c->yCoeff;
c->sparc_coeffs[6]=c->vgCoeff; c->sparc_coeffs[6]=c->vgCoeff;
c->sparc_coeffs[7]=c->vrCoeff; c->sparc_coeffs[7]=c->vrCoeff;

@ -50,14 +50,15 @@ const char *sws_format_name(enum PixelFormat format);
|| (x)==PIX_FMT_YUVA420P \ || (x)==PIX_FMT_YUVA420P \
) )
static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h){ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, int w, int h)
{
int x,y; int x,y;
uint64_t ssd=0; uint64_t ssd=0;
//printf("%d %d\n", w, h); //printf("%d %d\n", w, h);
for (y=0; y<h; y++){ for (y=0; y<h; y++) {
for (x=0; x<w; x++){ for (x=0; x<w; x++) {
int d= src1[x + y*stride1] - src2[x + y*stride2]; int d= src1[x + y*stride1] - src2[x + y*stride2];
ssd+= d*d; ssd+= d*d;
//printf("%d", abs(src1[x + y*stride1] - src2[x + y*stride2])/26 ); //printf("%d", abs(src1[x + y*stride1] - src2[x + y*stride2])/26 );
@ -70,7 +71,8 @@ static uint64_t getSSD(uint8_t *src1, uint8_t *src2, int stride1, int stride2, i
// test by ref -> src -> dst -> out & compare out against ref // test by ref -> src -> dst -> out & compare out against ref
// ref & out are YV12 // ref & out are YV12
static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat, int dstFormat, static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat, int dstFormat,
int srcW, int srcH, int dstW, int dstH, int flags){ int srcW, int srcH, int dstW, int dstH, int flags)
{
uint8_t *src[4] = {0}; uint8_t *src[4] = {0};
uint8_t *dst[4] = {0}; uint8_t *dst[4] = {0};
uint8_t *out[4] = {0}; uint8_t *out[4] = {0};
@ -82,7 +84,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat
int res; int res;
res = 0; res = 0;
for (i=0; i<4; i++){ for (i=0; i<4; i++) {
// avoid stride % bpp != 0 // avoid stride % bpp != 0
if (srcFormat==PIX_FMT_RGB24 || srcFormat==PIX_FMT_BGR24) if (srcFormat==PIX_FMT_RGB24 || srcFormat==PIX_FMT_BGR24)
srcStride[i]= srcW*3; srcStride[i]= srcW*3;
@ -169,7 +171,7 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat
sws_freeContext(dstContext); sws_freeContext(dstContext);
sws_freeContext(outContext); sws_freeContext(outContext);
for (i=0; i<4; i++){ for (i=0; i<4; i++) {
free(src[i]); free(src[i]);
free(dst[i]); free(dst[i]);
free(out[i]); free(out[i]);
@ -178,7 +180,8 @@ static int doTest(uint8_t *ref[4], int refStride[4], int w, int h, int srcFormat
return res; return res;
} }
static void selfTest(uint8_t *src[4], int stride[4], int w, int h){ static void selfTest(uint8_t *src[4], int stride[4], int w, int h)
{
enum PixelFormat srcFormat, dstFormat; enum PixelFormat srcFormat, dstFormat;
int srcW, srcH, dstW, dstH; int srcW, srcH, dstW, dstH;
int flags; int flags;
@ -206,7 +209,8 @@ static void selfTest(uint8_t *src[4], int stride[4], int w, int h){
#define W 96 #define W 96
#define H 96 #define H 96
int main(int argc, char **argv){ int main(int argc, char **argv)
{
uint8_t *rgb_data = malloc (W*H*4); uint8_t *rgb_data = malloc (W*H*4);
uint8_t *rgb_src[3]= {rgb_data, NULL, NULL}; uint8_t *rgb_src[3]= {rgb_data, NULL, NULL};
int rgb_stride[3]={4*W, 0, 0}; int rgb_stride[3]={4*W, 0, 0};
@ -221,8 +225,8 @@ int main(int argc, char **argv){
av_lfg_init(&rand, 1); av_lfg_init(&rand, 1);
for (y=0; y<H; y++){ for (y=0; y<H; y++) {
for (x=0; x<W*4; x++){ for (x=0; x<W*4; x++) {
rgb_data[ x + y*4*W]= av_lfg_get(&rand); rgb_data[ x + y*4*W]= av_lfg_get(&rand);
} }
} }

File diff suppressed because it is too large Load Diff

@ -64,7 +64,7 @@ typedef int (*SwsFunc)(struct SwsContext *context, uint8_t* src[],
uint8_t* dst[], int dstStride[]); uint8_t* dst[], int dstStride[]);
/* This struct should be aligned on at least a 32-byte boundary. */ /* This struct should be aligned on at least a 32-byte boundary. */
typedef struct SwsContext{ typedef struct SwsContext {
/** /**
* info on struct for av_log * info on struct for av_log
*/ */

@ -906,23 +906,23 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, con
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW) uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW)
{ {
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
if(!(c->flags & SWS_BITEXACT)){ if(!(c->flags & SWS_BITEXACT)) {
if (c->flags & SWS_ACCURATE_RND){ if (c->flags & SWS_ACCURATE_RND) {
if (uDest){ if (uDest) {
YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) YSCALEYUV2YV12X_ACCURATE( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) YSCALEYUV2YV12X_ACCURATE(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
} }
if (CONFIG_SWSCALE_ALPHA && aDest){ if (CONFIG_SWSCALE_ALPHA && aDest) {
YSCALEYUV2YV12X_ACCURATE( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW) YSCALEYUV2YV12X_ACCURATE( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
} }
YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW) YSCALEYUV2YV12X_ACCURATE("0", LUM_MMX_FILTER_OFFSET, dest, dstW)
}else{ } else {
if (uDest){ if (uDest) {
YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW) YSCALEYUV2YV12X( "0", CHR_MMX_FILTER_OFFSET, uDest, chrDstW)
YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW) YSCALEYUV2YV12X(AV_STRINGIFY(VOF), CHR_MMX_FILTER_OFFSET, vDest, chrDstW)
} }
if (CONFIG_SWSCALE_ALPHA && aDest){ if (CONFIG_SWSCALE_ALPHA && aDest) {
YSCALEYUV2YV12X( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW) YSCALEYUV2YV12X( "0", ALP_MMX_FILTER_OFFSET, aDest, dstW)
} }
@ -956,15 +956,15 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
{ {
int i; int i;
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
if(!(c->flags & SWS_BITEXACT)){ if(!(c->flags & SWS_BITEXACT)) {
long p= 4; long p= 4;
uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW}; uint8_t *src[4]= {alpSrc + dstW, lumSrc + dstW, chrSrc + chrDstW, chrSrc + VOFW + chrDstW};
uint8_t *dst[4]= {aDest, dest, uDest, vDest}; uint8_t *dst[4]= {aDest, dest, uDest, vDest};
x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW}; x86_reg counter[4]= {dstW, dstW, chrDstW, chrDstW};
if (c->flags & SWS_ACCURATE_RND){ if (c->flags & SWS_ACCURATE_RND) {
while(p--){ while(p--) {
if (dst[p]){ if (dst[p]) {
__asm__ volatile( __asm__ volatile(
YSCALEYUV2YV121_ACCURATE YSCALEYUV2YV121_ACCURATE
:: "r" (src[p]), "r" (dst[p] + counter[p]), :: "r" (src[p]), "r" (dst[p] + counter[p]),
@ -973,9 +973,9 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
); );
} }
} }
}else{ } else {
while(p--){ while(p--) {
if (dst[p]){ if (dst[p]) {
__asm__ volatile( __asm__ volatile(
YSCALEYUV2YV121 YSCALEYUV2YV121
:: "r" (src[p]), "r" (dst[p] + counter[p]), :: "r" (src[p]), "r" (dst[p] + counter[p]),
@ -988,11 +988,10 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
return; return;
} }
#endif #endif
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int val= (lumSrc[i]+64)>>7; int val= (lumSrc[i]+64)>>7;
if (val&256){ if (val&256) {
if (val<0) val=0; if (val<0) val=0;
else val=255; else val=255;
} }
@ -1001,12 +1000,11 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
} }
if (uDest) if (uDest)
for (i=0; i<chrDstW; i++) for (i=0; i<chrDstW; i++) {
{
int u=(chrSrc[i ]+64)>>7; int u=(chrSrc[i ]+64)>>7;
int v=(chrSrc[i + VOFW]+64)>>7; int v=(chrSrc[i + VOFW]+64)>>7;
if ((u|v)&256){ if ((u|v)&256) {
if (u<0) u=0; if (u<0) u=0;
else if (u>255) u=255; else if (u>255) u=255;
if (v<0) v=0; if (v<0) v=0;
@ -1018,7 +1016,7 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const
} }
if (CONFIG_SWSCALE_ALPHA && aDest) if (CONFIG_SWSCALE_ALPHA && aDest)
for (i=0; i<dstW; i++){ for (i=0; i<dstW; i++) {
int val= (alpSrc[i]+64)>>7; int val= (alpSrc[i]+64)>>7;
aDest[i]= av_clip_uint8(val); aDest[i]= av_clip_uint8(val);
} }
@ -1034,11 +1032,11 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
{ {
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
x86_reg dummy=0; x86_reg dummy=0;
if(!(c->flags & SWS_BITEXACT)){ if(!(c->flags & SWS_BITEXACT)) {
if (c->flags & SWS_ACCURATE_RND){ if (c->flags & SWS_ACCURATE_RND) {
switch(c->dstFormat){ switch(c->dstFormat) {
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX YSCALEYUV2RGBX
"movq %%mm2, "U_TEMP"(%0) \n\t" "movq %%mm2, "U_TEMP"(%0) \n\t"
@ -1052,7 +1050,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6) WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
}else{ } else {
YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7 \n\t" "pcmpeqd %%mm7, %%mm7 \n\t"
@ -1116,11 +1114,10 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
return; return;
} }
}else{ } else {
switch(c->dstFormat) switch(c->dstFormat) {
{
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX YSCALEYUV2PACKEDX
YSCALEYUV2RGBX YSCALEYUV2RGBX
YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7) YSCALEYUV2PACKEDX_YA(ALP_MMX_FILTER_OFFSET, %%mm0, %%mm3, %%mm6, %%mm1, %%mm7)
@ -1129,7 +1126,7 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter,
"packuswb %%mm7, %%mm1 \n\t" "packuswb %%mm7, %%mm1 \n\t"
WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
}else{ } else {
YSCALEYUV2PACKEDX YSCALEYUV2PACKEDX
YSCALEYUV2RGBX YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7 \n\t" "pcmpeqd %%mm7, %%mm7 \n\t"
@ -1222,12 +1219,11 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
int i; int i;
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
if(!(c->flags & SWS_BITEXACT)){ if(!(c->flags & SWS_BITEXACT)) {
switch(c->dstFormat) switch(c->dstFormat) {
{
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
#if ARCH_X86_64 #if ARCH_X86_64
__asm__ volatile( __asm__ volatile(
YSCALEYUV2RGB(%%REGBP, %5) YSCALEYUV2RGB(%%REGBP, %5)
@ -1268,7 +1264,7 @@ static inline void RENAME(yuv2packed2)(SwsContext *c, const uint16_t *buf0, cons
"a" (&c->redDither) "a" (&c->redDither)
); );
#endif #endif
}else{ } else {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"REG_b" \n\t"
@ -1373,20 +1369,17 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1 const uint16_t *buf1= buf0; //FIXME needed for RGB1/BGR1
const int yalpha= 4096; //FIXME ... const int yalpha= 4096; //FIXME ...
if (flags&SWS_FULL_CHR_H_INT) if (flags&SWS_FULL_CHR_H_INT) {
{
c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y); c->yuv2packed2(c, buf0, buf0, uvbuf0, uvbuf1, abuf0, abuf0, dest, dstW, 0, uvalpha, y);
return; return;
} }
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
if(!(flags & SWS_BITEXACT)){ if(!(flags & SWS_BITEXACT)) {
if (uvalpha < 2048) // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
{ switch(dstFormat) {
switch(dstFormat)
{
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"REG_b" \n\t"
@ -1400,7 +1393,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
:: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
}else{ } else {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"REG_b" \n\t"
@ -1489,13 +1482,10 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
); );
return; return;
} }
} } else {
else switch(dstFormat) {
{
switch(dstFormat)
{
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"REG_b" \n\t"
@ -1509,7 +1499,7 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
:: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest), :: "c" (buf0), "d" (abuf0), "S" (uvbuf0), "D" (uvbuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
}else{ } else {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"REG_b" \n\t"
@ -1601,10 +1591,9 @@ static inline void RENAME(yuv2packed1)(SwsContext *c, const uint16_t *buf0, cons
} }
} }
#endif /* COMPILE_TEMPLATE_MMX */ #endif /* COMPILE_TEMPLATE_MMX */
if (uvalpha < 2048) if (uvalpha < 2048) {
{
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
}else{ } else {
YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C) YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C(void,0), YSCALE_YUV_2_GRAY16_1_C, YSCALE_YUV_2_MONO2_C)
} }
} }
@ -1662,8 +1651,7 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
); );
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
dstU[i]= src1[4*i + 1]; dstU[i]= src1[4*i + 1];
dstV[i]= src1[4*i + 3]; dstV[i]= src1[4*i + 3];
} }
@ -1696,8 +1684,7 @@ static inline void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
); );
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
dstU[i]= src1[2*i + 1]; dstU[i]= src1[2*i + 1];
dstV[i]= src2[2*i + 1]; dstV[i]= src2[2*i + 1];
} }
@ -1756,8 +1743,7 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
); );
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
dstU[i]= src1[4*i + 0]; dstU[i]= src1[4*i + 0];
dstV[i]= src1[4*i + 2]; dstV[i]= src1[4*i + 2];
} }
@ -1791,8 +1777,7 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
); );
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
dstU[i]= src1[2*i]; dstU[i]= src1[2*i];
dstV[i]= src2[2*i]; dstV[i]= src2[2*i];
} }
@ -1803,13 +1788,13 @@ static inline void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *s
static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, int srcFormat) static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, int srcFormat)
{ {
if(srcFormat == PIX_FMT_BGR24){ if(srcFormat == PIX_FMT_BGR24) {
__asm__ volatile( __asm__ volatile(
"movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t" "movq "MANGLE(ff_bgr24toY1Coeff)", %%mm5 \n\t"
"movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t" "movq "MANGLE(ff_bgr24toY2Coeff)", %%mm6 \n\t"
: :
); );
}else{ } else {
__asm__ volatile( __asm__ volatile(
"movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t" "movq "MANGLE(ff_rgb24toY1Coeff)", %%mm5 \n\t"
"movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t" "movq "MANGLE(ff_rgb24toY2Coeff)", %%mm6 \n\t"
@ -1918,8 +1903,7 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24); RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int b= src[i*3+0]; int b= src[i*3+0];
int g= src[i*3+1]; int g= src[i*3+1];
int r= src[i*3+2]; int r= src[i*3+2];
@ -1935,8 +1919,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24); RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int b= src1[3*i + 0]; int b= src1[3*i + 0];
int g= src1[3*i + 1]; int g= src1[3*i + 1];
int r= src1[3*i + 2]; int r= src1[3*i + 2];
@ -1951,8 +1934,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused) static inline void RENAME(bgr24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{ {
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int b= src1[6*i + 0] + src1[6*i + 3]; int b= src1[6*i + 0] + src1[6*i + 3];
int g= src1[6*i + 1] + src1[6*i + 4]; int g= src1[6*i + 1] + src1[6*i + 4];
int r= src1[6*i + 2] + src1[6*i + 5]; int r= src1[6*i + 2] + src1[6*i + 5];
@ -1969,8 +1951,7 @@ static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24); RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
#else #else
int i; int i;
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int r= src[i*3+0]; int r= src[i*3+0];
int g= src[i*3+1]; int g= src[i*3+1];
int b= src[i*3+2]; int b= src[i*3+2];
@ -1988,8 +1969,7 @@ static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t
#else #else
int i; int i;
assert(src1==src2); assert(src1==src2);
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int r= src1[3*i + 0]; int r= src1[3*i + 0];
int g= src1[3*i + 1]; int g= src1[3*i + 1];
int b= src1[3*i + 2]; int b= src1[3*i + 2];
@ -2004,8 +1984,7 @@ static inline void RENAME(rgb24ToUV_half)(uint8_t *dstU, uint8_t *dstV, const ui
{ {
int i; int i;
assert(src1==src2); assert(src1==src2);
for (i=0; i<width; i++) for (i=0; i<width; i++) {
{
int r= src1[6*i + 0] + src1[6*i + 3]; int r= src1[6*i + 0] + src1[6*i + 3];
int g= src1[6*i + 1] + src1[6*i + 4]; int g= src1[6*i + 1] + src1[6*i + 4];
int b= src1[6*i + 2] + src1[6*i + 5]; int b= src1[6*i + 2] + src1[6*i + 5];
@ -2022,8 +2001,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
{ {
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
assert(filterSize % 4 == 0 && filterSize>0); assert(filterSize % 4 == 0 && filterSize>0);
if (filterSize==4) // Always true for upscaling, sometimes for down, too. if (filterSize==4) { // Always true for upscaling, sometimes for down, too.
{
x86_reg counter= -2*dstW; x86_reg counter= -2*dstW;
filter-= counter*2; filter-= counter*2;
filterPos-= counter/2; filterPos-= counter/2;
@ -2067,9 +2045,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
: "%"REG_b : "%"REG_b
#endif #endif
); );
} } else if (filterSize==8) {
else if (filterSize==8)
{
x86_reg counter= -2*dstW; x86_reg counter= -2*dstW;
filter-= counter*4; filter-= counter*4;
filterPos-= counter/2; filterPos-= counter/2;
@ -2124,9 +2100,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
: "%"REG_b : "%"REG_b
#endif #endif
); );
} } else {
else
{
uint8_t *offset = src+filterSize; uint8_t *offset = src+filterSize;
x86_reg counter= -2*dstW; x86_reg counter= -2*dstW;
//filter-= counter*filterSize/2; //filter-= counter*filterSize/2;
@ -2180,14 +2154,12 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, const uint8_t *src, in
hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize); hScale_altivec_real(dst, dstW, src, srcW, xInc, filter, filterPos, filterSize);
#else #else
int i; int i;
for (i=0; i<dstW; i++) for (i=0; i<dstW; i++) {
{
int j; int j;
int srcPos= filterPos[i]; int srcPos= filterPos[i];
int val=0; int val=0;
//printf("filterPos: %d\n", filterPos[i]); //printf("filterPos: %d\n", filterPos[i]);
for (j=0; j<filterSize; j++) for (j=0; j<filterSize; j++) {
{
//printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]); //printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]);
val += ((int)src[srcPos + j])*filter[filterSize*i + j]; val += ((int)src[srcPos + j])*filter[filterSize*i + j];
} }
@ -2213,8 +2185,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
{ {
int i; int i;
unsigned int xpos=0; unsigned int xpos=0;
for (i=0;i<dstWidth;i++) for (i=0;i<dstWidth;i++) {
{
register unsigned int xx=xpos>>16; register unsigned int xx=xpos>>16;
register unsigned int xalpha=(xpos&0xFFFF)>>9; register unsigned int xalpha=(xpos&0xFFFF)>>9;
dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha;
@ -2259,17 +2230,14 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth,
#endif #endif
{ {
c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
} } else { // fast bilinear upscale / crap downscale
else // fast bilinear upscale / crap downscale
{
#if ARCH_X86 && CONFIG_GPL #if ARCH_X86 && CONFIG_GPL
#if COMPILE_TEMPLATE_MMX2 #if COMPILE_TEMPLATE_MMX2
int i; int i;
#if defined(PIC) #if defined(PIC)
DECLARE_ALIGNED(8, uint64_t, ebxsave); DECLARE_ALIGNED(8, uint64_t, ebxsave);
#endif #endif
if (canMMX2BeUsed) if (canMMX2BeUsed) {
{
__asm__ volatile( __asm__ volatile(
#if defined(PIC) #if defined(PIC)
"mov %%"REG_b", %5 \n\t" "mov %%"REG_b", %5 \n\t"
@ -2328,9 +2296,7 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth,
#endif #endif
); );
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128; for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128;
} } else {
else
{
#endif /* COMPILE_TEMPLATE_MMX2 */ #endif /* COMPILE_TEMPLATE_MMX2 */
x86_reg xInc_shr16 = xInc >> 16; x86_reg xInc_shr16 = xInc >> 16;
uint16_t xInc_mask = xInc & 0xffff; uint16_t xInc_mask = xInc & 0xffff;
@ -2372,14 +2338,14 @@ static inline void RENAME(hyscale)(SwsContext *c, uint16_t *dst, long dstWidth,
#endif /* ARCH_X86 */ #endif /* ARCH_X86 */
} }
if(!isAlpha && c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){ if(!isAlpha && c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))) {
int i; int i;
//FIXME all pal and rgb srcFormats could do this convertion as well //FIXME all pal and rgb srcFormats could do this convertion as well
//FIXME all scalers more complex than bilinear could do half of this transform //FIXME all scalers more complex than bilinear could do half of this transform
if(c->srcRange){ if(c->srcRange) {
for (i=0; i<dstWidth; i++) for (i=0; i<dstWidth; i++)
dst[i]= (dst[i]*14071 + 33561947)>>14; dst[i]= (dst[i]*14071 + 33561947)>>14;
}else{ } else {
for (i=0; i<dstWidth; i++) for (i=0; i<dstWidth; i++)
dst[i]= (FFMIN(dst[i],30189)*19077 - 39057361)>>14; dst[i]= (FFMIN(dst[i],30189)*19077 - 39057361)>>14;
} }
@ -2392,8 +2358,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
{ {
int i; int i;
unsigned int xpos=0; unsigned int xpos=0;
for (i=0;i<dstWidth;i++) for (i=0;i<dstWidth;i++) {
{
register unsigned int xx=xpos>>16; register unsigned int xx=xpos>>16;
register unsigned int xalpha=(xpos&0xFFFF)>>9; register unsigned int xalpha=(xpos&0xFFFF)>>9;
dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha);
@ -2445,17 +2410,14 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
{ {
c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
} } else { // fast bilinear upscale / crap downscale
else // fast bilinear upscale / crap downscale
{
#if ARCH_X86 && CONFIG_GPL #if ARCH_X86 && CONFIG_GPL
#if COMPILE_TEMPLATE_MMX2 #if COMPILE_TEMPLATE_MMX2
int i; int i;
#if defined(PIC) #if defined(PIC)
DECLARE_ALIGNED(8, uint64_t, ebxsave); DECLARE_ALIGNED(8, uint64_t, ebxsave);
#endif #endif
if (canMMX2BeUsed) if (canMMX2BeUsed) {
{
__asm__ volatile( __asm__ volatile(
#if defined(PIC) #if defined(PIC)
"mov %%"REG_b", %6 \n\t" "mov %%"REG_b", %6 \n\t"
@ -2500,15 +2462,12 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
,"%"REG_b ,"%"REG_b
#endif #endif
); );
for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) for (i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) {
{
//printf("%d %d %d\n", dstWidth, i, srcW); //printf("%d %d %d\n", dstWidth, i, srcW);
dst[i] = src1[srcW-1]*128; dst[i] = src1[srcW-1]*128;
dst[i+VOFW] = src2[srcW-1]*128; dst[i+VOFW] = src2[srcW-1]*128;
} }
} } else {
else
{
#endif /* COMPILE_TEMPLATE_MMX2 */ #endif /* COMPILE_TEMPLATE_MMX2 */
x86_reg xInc_shr16 = (x86_reg) (xInc >> 16); x86_reg xInc_shr16 = (x86_reg) (xInc >> 16);
uint16_t xInc_mask = xInc & 0xffff; uint16_t xInc_mask = xInc & 0xffff;
@ -2552,17 +2511,17 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc); c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc);
#endif /* ARCH_X86 */ #endif /* ARCH_X86 */
} }
if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))){ if(c->srcRange != c->dstRange && !(isRGB(c->dstFormat) || isBGR(c->dstFormat))) {
int i; int i;
//FIXME all pal and rgb srcFormats could do this convertion as well //FIXME all pal and rgb srcFormats could do this convertion as well
//FIXME all scalers more complex than bilinear could do half of this transform //FIXME all scalers more complex than bilinear could do half of this transform
if(c->srcRange){ if(c->srcRange) {
for (i=0; i<dstWidth; i++){ for (i=0; i<dstWidth; i++) {
dst[i ]= (dst[i ]*1799 + 4081085)>>11; //1469 dst[i ]= (dst[i ]*1799 + 4081085)>>11; //1469
dst[i+VOFW]= (dst[i+VOFW]*1799 + 4081085)>>11; //1469 dst[i+VOFW]= (dst[i+VOFW]*1799 + 4081085)>>11; //1469
} }
}else{ } else {
for (i=0; i<dstWidth; i++){ for (i=0; i<dstWidth; i++) {
dst[i ]= (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264 dst[i ]= (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264
dst[i+VOFW]= (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264 dst[i+VOFW]= (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264
} }
@ -2571,8 +2530,8 @@ inline static void RENAME(hcscale)(SwsContext *c, uint16_t *dst, long dstWidth,
} }
static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
/* load a few things into local vars to make the code more readable? and faster */ /* load a few things into local vars to make the code more readable? and faster */
const int srcW= c->srcW; const int srcW= c->srcW;
const int dstW= c->dstW; const int dstW= c->dstW;
@ -2617,7 +2576,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
int lastInLumBuf= c->lastInLumBuf; int lastInLumBuf= c->lastInLumBuf;
int lastInChrBuf= c->lastInChrBuf; int lastInChrBuf= c->lastInChrBuf;
if (isPacked(c->srcFormat)){ if (isPacked(c->srcFormat)) {
src[0]= src[0]=
src[1]= src[1]=
src[2]= src[2]=
@ -2636,11 +2595,9 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
//printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2], //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2],
//dstStride[0],dstStride[1],dstStride[2]); //dstStride[0],dstStride[1],dstStride[2]);
if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) if (dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0 || dstStride[3]%8 != 0) {
{
static int warnedAlready=0; //FIXME move this into the context perhaps static int warnedAlready=0; //FIXME move this into the context perhaps
if (flags & SWS_PRINT_INFO && !warnedAlready) if (flags & SWS_PRINT_INFO && !warnedAlready) {
{
av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n" av_log(c, AV_LOG_WARNING, "Warning: dstStride is not aligned!\n"
" ->cannot do aligned memory accesses anymore\n"); " ->cannot do aligned memory accesses anymore\n");
warnedAlready=1; warnedAlready=1;
@ -2650,7 +2607,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
/* Note the user might start scaling the picture in the middle so this /* Note the user might start scaling the picture in the middle so this
will not get executed. This is not really intended but works will not get executed. This is not really intended but works
currently, so people might do it. */ currently, so people might do it. */
if (srcSliceY ==0){ if (srcSliceY ==0) {
lumBufIndex=0; lumBufIndex=0;
chrBufIndex=0; chrBufIndex=0;
dstY=0; dstY=0;
@ -2660,7 +2617,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
lastDstY= dstY; lastDstY= dstY;
for (;dstY < dstH; dstY++){ for (;dstY < dstH; dstY++) {
unsigned char *dest =dst[0]+dstStride[0]*dstY; unsigned char *dest =dst[0]+dstStride[0]*dstY;
const int chrDstY= dstY>>c->chrDstVSubSample; const int chrDstY= dstY>>c->chrDstVSubSample;
unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; unsigned char *uDest=dst[1]+dstStride[1]*chrDstY;
@ -2695,8 +2652,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
vChrBufSize, vLumBufSize);*/ vChrBufSize, vLumBufSize);*/
//Do horizontal scaling //Do horizontal scaling
while(lastInLumBuf < lastLumSrcY) while(lastInLumBuf < lastLumSrcY) {
{
uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; uint8_t *src1= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0];
uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3]; uint8_t *src2= src[3]+(lastInLumBuf + 1 - srcSliceY)*srcStride[3];
lumBufIndex++; lumBufIndex++;
@ -2716,8 +2672,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
pal, 1); pal, 1);
lastInLumBuf++; lastInLumBuf++;
} }
while(lastInChrBuf < lastChrSrcY) while(lastInChrBuf < lastChrSrcY) {
{
uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1];
uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2];
chrBufIndex++; chrBufIndex++;
@ -2747,52 +2702,49 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
c->greenDither= ff_dither4[dstY&1]; c->greenDither= ff_dither4[dstY&1];
c->redDither= ff_dither8[(dstY+1)&1]; c->redDither= ff_dither8[(dstY+1)&1];
#endif #endif
if (dstY < dstH-2) if (dstY < dstH-2) {
{
const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
#if COMPILE_TEMPLATE_MMX #if COMPILE_TEMPLATE_MMX
int i; int i;
if (flags & SWS_ACCURATE_RND){ if (flags & SWS_ACCURATE_RND) {
int s= APCK_SIZE / 8; int s= APCK_SIZE / 8;
for (i=0; i<vLumFilterSize; i+=2){ for (i=0; i<vLumFilterSize; i+=2) {
*(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ]; *(void**)&lumMmxFilter[s*i ]= lumSrcPtr[i ];
*(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)]; *(void**)&lumMmxFilter[s*i+APCK_PTR2/4 ]= lumSrcPtr[i+(vLumFilterSize>1)];
lumMmxFilter[s*i+APCK_COEF/4 ]= lumMmxFilter[s*i+APCK_COEF/4 ]=
lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ] lumMmxFilter[s*i+APCK_COEF/4+1]= vLumFilter[dstY*vLumFilterSize + i ]
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0); + (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
if (CONFIG_SWSCALE_ALPHA && alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
*(void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ]; *(void**)&alpMmxFilter[s*i ]= alpSrcPtr[i ];
*(void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)]; *(void**)&alpMmxFilter[s*i+APCK_PTR2/4 ]= alpSrcPtr[i+(vLumFilterSize>1)];
alpMmxFilter[s*i+APCK_COEF/4 ]= alpMmxFilter[s*i+APCK_COEF/4 ]=
alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ]; alpMmxFilter[s*i+APCK_COEF/4+1]= lumMmxFilter[s*i+APCK_COEF/4 ];
} }
} }
for (i=0; i<vChrFilterSize; i+=2){ for (i=0; i<vChrFilterSize; i+=2) {
*(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ]; *(void**)&chrMmxFilter[s*i ]= chrSrcPtr[i ];
*(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)]; *(void**)&chrMmxFilter[s*i+APCK_PTR2/4 ]= chrSrcPtr[i+(vChrFilterSize>1)];
chrMmxFilter[s*i+APCK_COEF/4 ]= chrMmxFilter[s*i+APCK_COEF/4 ]=
chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ] chrMmxFilter[s*i+APCK_COEF/4+1]= vChrFilter[chrDstY*vChrFilterSize + i ]
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0); + (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
} }
}else{ } else {
for (i=0; i<vLumFilterSize; i++) for (i=0; i<vLumFilterSize; i++) {
{
lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i]; lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32; lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
lumMmxFilter[4*i+2]= lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]= lumMmxFilter[4*i+3]=
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001; ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
if (CONFIG_SWSCALE_ALPHA && alpPixBuf){ if (CONFIG_SWSCALE_ALPHA && alpPixBuf) {
alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i]; alpMmxFilter[4*i+0]= (int32_t)alpSrcPtr[i];
alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32; alpMmxFilter[4*i+1]= (uint64_t)alpSrcPtr[i] >> 32;
alpMmxFilter[4*i+2]= alpMmxFilter[4*i+2]=
alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2]; alpMmxFilter[4*i+3]= lumMmxFilter[4*i+2];
} }
} }
for (i=0; i<vChrFilterSize; i++) for (i=0; i<vChrFilterSize; i++) {
{
chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i]; chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32; chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
chrMmxFilter[4*i+2]= chrMmxFilter[4*i+2]=
@ -2801,87 +2753,72 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
} }
} }
#endif #endif
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){ if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
const int chrSkipMask= (1<<c->chrDstVSubSample)-1; const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
c->yuv2nv12X(c, c->yuv2nv12X(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, dstW, chrDstW, dstFormat); dest, uDest, dstW, chrDstW, dstFormat);
} } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like
else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12 like
{
const int chrSkipMask= (1<<c->chrDstVSubSample)-1; const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
if (is16BPS(dstFormat)) if (is16BPS(dstFormat)) {
{
yuv2yuvX16inC( yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
dstFormat); dstFormat);
} } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12
else
if (vLumFilterSize == 1 && vChrFilterSize == 1) // unscaled YV12
{
int16_t *lumBuf = lumPixBuf[0]; int16_t *lumBuf = lumPixBuf[0];
int16_t *chrBuf= chrPixBuf[0]; int16_t *chrBuf= chrPixBuf[0];
int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf[0] : NULL; int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpPixBuf[0] : NULL;
c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW); c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW);
} } else { //General YV12
else //General YV12
{
c->yuv2yuvX(c, c->yuv2yuvX(c,
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
} }
} } else {
else
{
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
if (vLumFilterSize == 1 && vChrFilterSize == 2) //unscaled RGB if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB
{
int chrAlpha= vChrFilter[2*dstY+1]; int chrAlpha= vChrFilter[2*dstY+1];
if(flags & SWS_FULL_CHR_H_INT){ if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c, //FIXME write a packed1_full function yuv2rgbXinC_full(c, //FIXME write a packed1_full function
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY); alpSrcPtr, dest, dstW, dstY);
}else{ } else {
c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1),
alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *alpSrcPtr : NULL,
dest, dstW, chrAlpha, dstFormat, flags, dstY); dest, dstW, chrAlpha, dstFormat, flags, dstY);
} }
} } else if (vLumFilterSize == 2 && vChrFilterSize == 2) { //bilinear upscale RGB
else if (vLumFilterSize == 2 && vChrFilterSize == 2) //bilinear upscale RGB
{
int lumAlpha= vLumFilter[2*dstY+1]; int lumAlpha= vLumFilter[2*dstY+1];
int chrAlpha= vChrFilter[2*dstY+1]; int chrAlpha= vChrFilter[2*dstY+1];
lumMmxFilter[2]= lumMmxFilter[2]=
lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001; lumMmxFilter[3]= vLumFilter[2*dstY ]*0x10001;
chrMmxFilter[2]= chrMmxFilter[2]=
chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001; chrMmxFilter[3]= vChrFilter[2*chrDstY]*0x10001;
if(flags & SWS_FULL_CHR_H_INT){ if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c, //FIXME write a packed2_full function yuv2rgbXinC_full(c, //FIXME write a packed2_full function
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY); alpSrcPtr, dest, dstW, dstY);
}else{ } else {
c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL, alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL,
dest, dstW, lumAlpha, chrAlpha, dstY); dest, dstW, lumAlpha, chrAlpha, dstY);
} }
} } else { //general RGB
else //general RGB if(flags & SWS_FULL_CHR_H_INT) {
{
if(flags & SWS_FULL_CHR_H_INT){
yuv2rgbXinC_full(c, yuv2rgbXinC_full(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY); alpSrcPtr, dest, dstW, dstY);
}else{ } else {
c->yuv2packedX(c, c->yuv2packedX(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
@ -2889,50 +2826,41 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
} }
} }
} }
} } else { // hmm looks like we can't use MMX here without overwriting this array's tail
else // hmm looks like we can't use MMX here without overwriting this array's tail
{
const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL;
if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21){ if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) {
const int chrSkipMask= (1<<c->chrDstVSubSample)-1; const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi
yuv2nv12XinC( yuv2nv12XinC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
dest, uDest, dstW, chrDstW, dstFormat); dest, uDest, dstW, chrDstW, dstFormat);
} } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12
else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) //YV12
{
const int chrSkipMask= (1<<c->chrDstVSubSample)-1; const int chrSkipMask= (1<<c->chrDstVSubSample)-1;
if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi
if (is16BPS(dstFormat)) if (is16BPS(dstFormat)) {
{
yuv2yuvX16inC( yuv2yuvX16inC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW,
dstFormat); dstFormat);
} } else {
else
{
yuv2yuvXinC( yuv2yuvXinC(
vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW);
} }
} } else {
else
{
assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2);
assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2);
if(flags & SWS_FULL_CHR_H_INT){ if(flags & SWS_FULL_CHR_H_INT) {
yuv2rgbXinC_full(c, yuv2rgbXinC_full(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
alpSrcPtr, dest, dstW, dstY); alpSrcPtr, dest, dstW, dstY);
}else{ } else {
yuv2packedXinC(c, yuv2packedXinC(c,
vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,

@ -63,15 +63,15 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
if (c->flags & SWS_CPU_CAPS_MMX2) { if (c->flags & SWS_CPU_CAPS_MMX2) {
switch (c->dstFormat) { switch (c->dstFormat) {
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P){ if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_rgb32_MMX2; if (HAVE_7REGS) return yuva420_rgb32_MMX2;
break; break;
}else return yuv420_rgb32_MMX2; } else return yuv420_rgb32_MMX2;
case PIX_FMT_BGR32: case PIX_FMT_BGR32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P){ if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_bgr32_MMX2; if (HAVE_7REGS) return yuva420_bgr32_MMX2;
break; break;
}else return yuv420_bgr32_MMX2; } else return yuv420_bgr32_MMX2;
case PIX_FMT_BGR24: return yuv420_rgb24_MMX2; case PIX_FMT_BGR24: return yuv420_rgb24_MMX2;
case PIX_FMT_RGB565: return yuv420_rgb16_MMX2; case PIX_FMT_RGB565: return yuv420_rgb16_MMX2;
case PIX_FMT_RGB555: return yuv420_rgb15_MMX2; case PIX_FMT_RGB555: return yuv420_rgb15_MMX2;
@ -80,15 +80,15 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c)
if (c->flags & SWS_CPU_CAPS_MMX) { if (c->flags & SWS_CPU_CAPS_MMX) {
switch (c->dstFormat) { switch (c->dstFormat) {
case PIX_FMT_RGB32: case PIX_FMT_RGB32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P){ if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_rgb32_MMX; if (HAVE_7REGS) return yuva420_rgb32_MMX;
break; break;
}else return yuv420_rgb32_MMX; } else return yuv420_rgb32_MMX;
case PIX_FMT_BGR32: case PIX_FMT_BGR32:
if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P){ if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) {
if (HAVE_7REGS) return yuva420_bgr32_MMX; if (HAVE_7REGS) return yuva420_bgr32_MMX;
break; break;
}else return yuv420_bgr32_MMX; } else return yuv420_bgr32_MMX;
case PIX_FMT_BGR24: return yuv420_rgb24_MMX; case PIX_FMT_BGR24: return yuv420_rgb24_MMX;
case PIX_FMT_RGB565: return yuv420_rgb16_MMX; case PIX_FMT_RGB565: return yuv420_rgb16_MMX;
case PIX_FMT_RGB555: return yuv420_rgb15_MMX; case PIX_FMT_RGB555: return yuv420_rgb15_MMX;

@ -122,7 +122,7 @@
#define YUV422_UNSHIFT \ #define YUV422_UNSHIFT \
if(c->srcFormat == PIX_FMT_YUV422P){ \ if(c->srcFormat == PIX_FMT_YUV422P) {\
srcStride[1] *= 2; \ srcStride[1] *= 2; \
srcStride[2] *= 2; \ srcStride[2] *= 2; \
} \ } \
@ -180,7 +180,8 @@
return srcSliceH; \ return srcSliceH; \
static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, h_size; int y, h_size;
YUV422_UNSHIFT YUV422_UNSHIFT
@ -236,7 +237,8 @@ static inline int RENAME(yuv420_rgb16)(SwsContext *c, uint8_t* src[], int srcStr
} }
static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, h_size; int y, h_size;
YUV422_UNSHIFT YUV422_UNSHIFT
@ -294,7 +296,8 @@ static inline int RENAME(yuv420_rgb15)(SwsContext *c, uint8_t* src[], int srcStr
} }
static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuv420_rgb24)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, h_size; int y, h_size;
YUV422_UNSHIFT YUV422_UNSHIFT
@ -470,7 +473,8 @@ etc.
"movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \ "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */ \
static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, h_size; int y, h_size;
YUV422_UNSHIFT YUV422_UNSHIFT
@ -486,7 +490,8 @@ static inline int RENAME(yuv420_rgb32)(SwsContext *c, uint8_t* src[], int srcStr
} }
static inline int RENAME(yuva420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuva420_rgb32)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
#if HAVE_7REGS #if HAVE_7REGS
int y, h_size; int y, h_size;
@ -504,7 +509,8 @@ static inline int RENAME(yuva420_rgb32)(SwsContext *c, uint8_t* src[], int srcSt
} }
static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
int y, h_size; int y, h_size;
YUV422_UNSHIFT YUV422_UNSHIFT
@ -520,7 +526,8 @@ static inline int RENAME(yuv420_bgr32)(SwsContext *c, const uint8_t* src[], int
} }
static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY, static inline int RENAME(yuva420_bgr32)(SwsContext *c, const uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[]){ int srcSliceH, uint8_t* dst[], int dstStride[])
{
#if HAVE_7REGS #if HAVE_7REGS
int y, h_size; int y, h_size;

@ -92,7 +92,8 @@ const int32_t ff_yuv2rgb_coeffs[8][4] = {
#define YUV2RGBFUNC(func_name, dst_type, alpha) \ #define YUV2RGBFUNC(func_name, dst_type, alpha) \
static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, \
int srcSliceH, uint8_t* dst[], int dstStride[]){\ int srcSliceH, uint8_t* dst[], int dstStride[]) \
{\
int y;\ int y;\
\ \
if (!alpha && c->srcFormat == PIX_FMT_YUV422P) {\ if (!alpha && c->srcFormat == PIX_FMT_YUV422P) {\
@ -110,7 +111,7 @@ static int func_name(SwsContext *c, uint8_t* src[], int srcStride[], int srcSlic
uint8_t *pv = src[2] + (y>>1)*srcStride[2];\ uint8_t *pv = src[2] + (y>>1)*srcStride[2];\
uint8_t av_unused *pa_1, *pa_2;\ uint8_t av_unused *pa_1, *pa_2;\
unsigned int h_size = c->dstW>>3;\ unsigned int h_size = c->dstW>>3;\
if (alpha){\ if (alpha) {\
pa_1 = src[3] + y*srcStride[3];\ pa_1 = src[3] + y*srcStride[3];\
pa_2 = pa_1 + srcStride[3];\ pa_2 = pa_1 + srcStride[3];\
}\ }\

Loading…
Cancel
Save