swscaler cleanup

green line at bottom bugfix
green lines in yuv2yuv scaler bugfix

Originally committed as revision 3210 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
pull/126/head
Michael Niedermayer 23 years ago
parent 6e9b48406c
commit d1fac6cf52
  1. 30
      postproc/swscale.c
  2. 9
      postproc/swscale.h
  3. 215
      postproc/swscale_template.c

@ -7,6 +7,7 @@
#include <inttypes.h> #include <inttypes.h>
#include <string.h> #include <string.h>
//#include <stdio.h> //FOR DEBUG ONLY
#include "../config.h" #include "../config.h"
#include "swscale.h" #include "swscale.h"
#include "../cpudetect.h" #include "../cpudetect.h"
@ -222,39 +223,34 @@ void in_asm_used_var_warning_killer()
// *** bilinear scaling and yuv->rgb or yuv->yuv conversion of yv12 slices: // *** bilinear scaling and yuv->rgb or yuv->yuv conversion of yv12 slices:
// *** Note: it's called multiple times while decoding a frame, first time y==0 // *** Note: it's called multiple times while decoding a frame, first time y==0
// *** Designed to upscale, but may work for downscale too. // *** Designed to upscale, but may work for downscale too.
// s_xinc = (src_width << 16) / dst_width
// s_yinc = (src_height << 16) / dst_height
// switching the cpu type during a sliced drawing can have bad effects, like sig11 // switching the cpu type during a sliced drawing can have bad effects, like sig11
void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int y, int h, void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int srcSliceY ,
uint8_t* dstptr[], int dststride, int dstw, int dstbpp, int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp,
unsigned int s_xinc,unsigned int s_yinc){ int srcW, int srcH, int dstW, int dstH){
// scaling factors:
//static int s_yinc=(vo_dga_src_height<<16)/vo_dga_vp_height;
//static int s_xinc=(vo_dga_src_width<<8)/vo_dga_vp_width;
#ifdef RUNTIME_CPUDETECT #ifdef RUNTIME_CPUDETECT
#ifdef CAN_COMPILE_X86_ASM #ifdef CAN_COMPILE_X86_ASM
// ordered per speed fasterst first // ordered per speed fasterst first
if(gCpuCaps.hasMMX2) if(gCpuCaps.hasMMX2)
SwScale_YV12slice_MMX2(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
else if(gCpuCaps.has3DNow) else if(gCpuCaps.has3DNow)
SwScale_YV12slice_3DNow(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
else if(gCpuCaps.hasMMX) else if(gCpuCaps.hasMMX)
SwScale_YV12slice_MMX(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
else else
SwScale_YV12slice_C(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
#else #else
SwScale_YV12slice_C(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
#endif #endif
#else //RUNTIME_CPUDETECT #else //RUNTIME_CPUDETECT
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
SwScale_YV12slice_MMX2(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
#elif defined (HAVE_3DNOW) #elif defined (HAVE_3DNOW)
SwScale_YV12slice_3DNow(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
#elif defined (HAVE_MMX) #elif defined (HAVE_MMX)
SwScale_YV12slice_MMX(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
#else #else
SwScale_YV12slice_C(srcptr, stride, y, h, dstptr, dststride, dstw, dstbpp, s_xinc, s_yinc); SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH);
#endif #endif
#endif //!RUNTIME_CPUDETECT #endif //!RUNTIME_CPUDETECT

@ -2,12 +2,9 @@
// *** bilinear scaling and yuv->rgb & yuv->yuv conversion of yv12 slices: // *** bilinear scaling and yuv->rgb & yuv->yuv conversion of yv12 slices:
// *** Note: it's called multiple times while decoding a frame, first time y==0 // *** Note: it's called multiple times while decoding a frame, first time y==0
// *** Designed to upscale, but may work for downscale too. // *** Designed to upscale, but may work for downscale too.
// s_xinc = (src_width << 8) / dst_width
// s_yinc = (src_height << 16) / dst_height
// dstbpp == 12 -> yv12 output // dstbpp == 12 -> yv12 output
void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int y, int h, void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int srcSliceY,
uint8_t* dstptr[], int dststride, int dstw, int dstbpp, int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp,
unsigned int s_xinc,unsigned int s_yinc); int srcW, int srcH, int dstW, int dstH);
// generating tables // generating tables
void SwScale_Init(); void SwScale_Init();

@ -520,7 +520,7 @@
#endif #endif
static inline void RENAME(yuv2yuv)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, static inline void RENAME(yuv2yuv)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstw, int yalpha, int uvalpha) uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int yalpha, int uvalpha)
{ {
int yalpha1=yalpha^4095; int yalpha1=yalpha^4095;
int uvalpha1=uvalpha^4095; int uvalpha1=uvalpha^4095;
@ -530,14 +530,14 @@ static inline void RENAME(yuv2yuv)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvb
asm volatile ("\n\t"::: "memory"); asm volatile ("\n\t"::: "memory");
#endif #endif
for(i=0;i<dstw;i++) for(i=0;i<dstW;i++)
{ {
((uint8_t*)dest)[i] = (buf0[i]*yalpha1+buf1[i]*yalpha)>>19; ((uint8_t*)dest)[i] = (buf0[i]*yalpha1+buf1[i]*yalpha)>>19;
} }
if(uvalpha != -1) if(uvalpha != -1)
{ {
for(i=0; i<(dstw>>1); i++) for(i=0; i<(dstW>>1); i++)
{ {
((uint8_t*)uDest)[i] = (uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19; ((uint8_t*)uDest)[i] = (uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19;
((uint8_t*)vDest)[i] = (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19; ((uint8_t*)vDest)[i] = (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;
@ -549,7 +549,7 @@ static inline void RENAME(yuv2yuv)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvb
* vertical scale YV12 to RGB * vertical scale YV12 to RGB
*/ */
static inline void RENAME(yuv2rgbX)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, static inline void RENAME(yuv2rgbX)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
uint8_t *dest, int dstw, int yalpha, int uvalpha, int dstbpp) uint8_t *dest, int dstW, int yalpha, int uvalpha, int dstbpp)
{ {
int yalpha1=yalpha^4095; int yalpha1=yalpha^4095;
int uvalpha1=uvalpha^4095; int uvalpha1=uvalpha^4095;
@ -579,7 +579,7 @@ FULL_YSCALEYUV2RGB
" jb 1b \n\t" " jb 1b \n\t"
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -629,7 +629,7 @@ FULL_YSCALEYUV2RGB
"cmpl %5, %%eax \n\t" "cmpl %5, %%eax \n\t"
" jb 1b \n\t" " jb 1b \n\t"
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax", "%ebx" : "%eax", "%ebx"
); );
@ -663,7 +663,7 @@ FULL_YSCALEYUV2RGB
"cmpl %5, %%eax \n\t" "cmpl %5, %%eax \n\t"
" jb 1b \n\t" " jb 1b \n\t"
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -697,7 +697,7 @@ FULL_YSCALEYUV2RGB
"cmpl %5, %%eax \n\t" "cmpl %5, %%eax \n\t"
" jb 1b \n\t" " jb 1b \n\t"
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -708,7 +708,7 @@ FULL_YSCALEYUV2RGB
if(dstbpp==32 || dstbpp==24) if(dstbpp==32 || dstbpp==24)
{ {
int i; int i;
for(i=0;i<dstw;i++){ for(i=0;i<dstW;i++){
// vertical linear interpolation && yuv2rgb in a single step: // vertical linear interpolation && yuv2rgb in a single step:
int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
@ -722,7 +722,7 @@ FULL_YSCALEYUV2RGB
else if(dstbpp==16) else if(dstbpp==16)
{ {
int i; int i;
for(i=0;i<dstw;i++){ for(i=0;i<dstW;i++){
// vertical linear interpolation && yuv2rgb in a single step: // vertical linear interpolation && yuv2rgb in a single step:
int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
@ -737,7 +737,7 @@ FULL_YSCALEYUV2RGB
else if(dstbpp==15) else if(dstbpp==15)
{ {
int i; int i;
for(i=0;i<dstw;i++){ for(i=0;i<dstW;i++){
// vertical linear interpolation && yuv2rgb in a single step: // vertical linear interpolation && yuv2rgb in a single step:
int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
@ -760,7 +760,7 @@ FULL_YSCALEYUV2RGB
YSCALEYUV2RGB YSCALEYUV2RGB
WRITEBGR32 WRITEBGR32
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -772,7 +772,7 @@ FULL_YSCALEYUV2RGB
YSCALEYUV2RGB YSCALEYUV2RGB
WRITEBGR24 WRITEBGR24
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax", "%ebx" : "%eax", "%ebx"
); );
@ -790,7 +790,7 @@ FULL_YSCALEYUV2RGB
WRITEBGR15 WRITEBGR15
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -808,7 +808,7 @@ FULL_YSCALEYUV2RGB
WRITEBGR16 WRITEBGR16
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -819,7 +819,7 @@ FULL_YSCALEYUV2RGB
if(dstbpp==32) if(dstbpp==32)
{ {
int i; int i;
for(i=0; i<dstw-1; i+=2){ for(i=0; i<dstW-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step: // vertical linear interpolation && yuv2rgb in a single step:
int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
@ -842,7 +842,7 @@ FULL_YSCALEYUV2RGB
if(dstbpp==24) if(dstbpp==24)
{ {
int i; int i;
for(i=0; i<dstw-1; i+=2){ for(i=0; i<dstW-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step: // vertical linear interpolation && yuv2rgb in a single step:
int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
@ -866,7 +866,7 @@ FULL_YSCALEYUV2RGB
else if(dstbpp==16) else if(dstbpp==16)
{ {
int i; int i;
for(i=0; i<dstw-1; i+=2){ for(i=0; i<dstW-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step: // vertical linear interpolation && yuv2rgb in a single step:
int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
@ -891,7 +891,7 @@ FULL_YSCALEYUV2RGB
else if(dstbpp==15) else if(dstbpp==15)
{ {
int i; int i;
for(i=0; i<dstw-1; i+=2){ for(i=0; i<dstW-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step: // vertical linear interpolation && yuv2rgb in a single step:
int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)];
@ -921,7 +921,7 @@ FULL_YSCALEYUV2RGB
* YV12 to RGB without scaling or interpolating * YV12 to RGB without scaling or interpolating
*/ */
static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1,
uint8_t *dest, int dstw, int yalpha, int uvalpha, int dstbpp) uint8_t *dest, int dstW, int yalpha, int uvalpha, int dstbpp)
{ {
int uvalpha1=uvalpha^4095; int uvalpha1=uvalpha^4095;
#ifdef HAVE_MMX #ifdef HAVE_MMX
@ -930,7 +930,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
if(fullUVIpol || allwaysIpol) if(fullUVIpol || allwaysIpol)
{ {
RENAME(yuv2rgbX)(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp); RENAME(yuv2rgbX)(buf0, buf1, uvbuf0, uvbuf1, dest, dstW, yalpha, uvalpha, dstbpp);
return; return;
} }
if( yalpha > 2048 ) buf0 = buf1; if( yalpha > 2048 ) buf0 = buf1;
@ -943,7 +943,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
asm volatile( asm volatile(
YSCALEYUV2RGB1 YSCALEYUV2RGB1
WRITEBGR32 WRITEBGR32
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -954,7 +954,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
"movl %4, %%ebx \n\t" "movl %4, %%ebx \n\t"
YSCALEYUV2RGB1 YSCALEYUV2RGB1
WRITEBGR24 WRITEBGR24
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax", "%ebx" : "%eax", "%ebx"
); );
@ -970,7 +970,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
"paddusb r5Dither, %%mm5 \n\t" "paddusb r5Dither, %%mm5 \n\t"
#endif #endif
WRITEBGR15 WRITEBGR15
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -987,7 +987,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
#endif #endif
WRITEBGR16 WRITEBGR16
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -1000,7 +1000,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
asm volatile( asm volatile(
YSCALEYUV2RGB1b YSCALEYUV2RGB1b
WRITEBGR32 WRITEBGR32
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -1011,7 +1011,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
"movl %4, %%ebx \n\t" "movl %4, %%ebx \n\t"
YSCALEYUV2RGB1b YSCALEYUV2RGB1b
WRITEBGR24 WRITEBGR24
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax", "%ebx" : "%eax", "%ebx"
); );
@ -1027,7 +1027,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
"paddusb r5Dither, %%mm5 \n\t" "paddusb r5Dither, %%mm5 \n\t"
#endif #endif
WRITEBGR15 WRITEBGR15
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -1044,7 +1044,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
#endif #endif
WRITEBGR16 WRITEBGR16
:: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstw), :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW),
"m" (yalpha1), "m" (uvalpha1) "m" (yalpha1), "m" (uvalpha1)
: "%eax" : "%eax"
); );
@ -1057,7 +1057,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
if(dstbpp==32) if(dstbpp==32)
{ {
int i; int i;
for(i=0; i<dstw-1; i+=2){ for(i=0; i<dstW-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step: // vertical linear interpolation && yuv2rgb in a single step:
int Y1=yuvtab_2568[buf0[i]>>7]; int Y1=yuvtab_2568[buf0[i]>>7];
int Y2=yuvtab_2568[buf0[i+1]>>7]; int Y2=yuvtab_2568[buf0[i+1]>>7];
@ -1080,7 +1080,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
if(dstbpp==24) if(dstbpp==24)
{ {
int i; int i;
for(i=0; i<dstw-1; i+=2){ for(i=0; i<dstW-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step: // vertical linear interpolation && yuv2rgb in a single step:
int Y1=yuvtab_2568[buf0[i]>>7]; int Y1=yuvtab_2568[buf0[i]>>7];
int Y2=yuvtab_2568[buf0[i+1]>>7]; int Y2=yuvtab_2568[buf0[i+1]>>7];
@ -1104,7 +1104,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
else if(dstbpp==16) else if(dstbpp==16)
{ {
int i; int i;
for(i=0; i<dstw-1; i+=2){ for(i=0; i<dstW-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step: // vertical linear interpolation && yuv2rgb in a single step:
int Y1=yuvtab_2568[buf0[i]>>7]; int Y1=yuvtab_2568[buf0[i]>>7];
int Y2=yuvtab_2568[buf0[i+1]>>7]; int Y2=yuvtab_2568[buf0[i+1]>>7];
@ -1129,7 +1129,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *buf1, uint16_t *uv
else if(dstbpp==15) else if(dstbpp==15)
{ {
int i; int i;
for(i=0; i<dstw-1; i+=2){ for(i=0; i<dstW-1; i+=2){
// vertical linear interpolation && yuv2rgb in a single step: // vertical linear interpolation && yuv2rgb in a single step:
int Y1=yuvtab_2568[buf0[i]>>7]; int Y1=yuvtab_2568[buf0[i]>>7];
int Y2=yuvtab_2568[buf0[i+1]>>7]; int Y2=yuvtab_2568[buf0[i+1]>>7];
@ -1404,26 +1404,26 @@ FUNNYUVCODE
#endif #endif
} }
static void RENAME(SwScale_YV12slice)(unsigned char* srcptr[],int stride[], int y, int h, static void RENAME(SwScale_YV12slice)(unsigned char* srcptr[],int stride[], int srcSliceY ,
uint8_t* dstptr[], int dststride, int dstw, int dstbpp, int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp,
unsigned int s_xinc,unsigned int s_yinc){ int srcW, int srcH, int dstW, int dstH){
// scaling factors:
//static int s_yinc=(vo_dga_src_height<<16)/vo_dga_vp_height;
//static int s_xinc=(vo_dga_src_width<<8)/vo_dga_vp_width;
unsigned int s_xinc2; unsigned int s_xinc2;
//FIXME do we need th +-2 stuff?
unsigned int s_xinc= (srcW << 16) / dstW - 2;
unsigned int s_yinc= (srcH << 16) / dstH + 2;
static int s_srcypos; // points to the dst Pixels center in the source (0 is the center of pixel 0,0 in src) static int lumDstYInSrc; // points to the dst Pixels center in the source (0 is the center of pixel 0,0 in src)
static int s_ypos; static int dstY;
// last horzontally interpolated lines, used to avoid unnecessary calculations // last horzontally interpolated lines, used to avoid unnecessary calculations
static int s_last_ypos; static int lastLumSrcY;
static int s_last_y1pos; static int lastChrSrcY;
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
// used to detect a horizontal size change // used to detect a horizontal size change
static int old_dstw= -1; static int old_dstW= -1;
static int old_s_xinc= -1; static int old_s_xinc= -1;
#endif #endif
@ -1431,13 +1431,13 @@ int srcWidth;
int dstUVw; int dstUVw;
int i; int i;
if(((dstw + 7)&(~7)) >= dststride) dstw&= ~7; if(((dstW + 7)&(~7)) >= dststride) dstW&= ~7;
srcWidth= (dstw*s_xinc + 0x8000)>>16; srcWidth= (dstW*s_xinc + 0x8000)>>16;
dstUVw= fullUVIpol ? dstw : dstw/2; dstUVw= fullUVIpol ? dstW : dstW/2;
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
canMMX2BeUsed= (s_xinc <= 0x10000 && (dstw&31)==0 && (srcWidth&15)==0) ? 1 : 0; canMMX2BeUsed= (s_xinc <= 0x10000 && (dstW&31)==0 && (srcWidth&15)==0) ? 1 : 0;
#endif #endif
// match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
@ -1446,21 +1446,21 @@ canMMX2BeUsed= (s_xinc <= 0x10000 && (dstw&31)==0 && (srcWidth&15)==0) ? 1 : 0;
// would be like the vertical one, but that would require some special code for the // would be like the vertical one, but that would require some special code for the
// first and last pixel // first and last pixel
if(canMMX2BeUsed) s_xinc+= 20; if(canMMX2BeUsed) s_xinc+= 20;
else s_xinc = ((srcWidth-2)<<16)/(dstw-2) - 20; else s_xinc = ((srcWidth-2)<<16)/(dstW-2) - 20;
if(fullUVIpol && !(dstbpp==12)) s_xinc2= s_xinc>>1; if(fullUVIpol && !(dstbpp==12)) s_xinc2= s_xinc>>1;
else s_xinc2= s_xinc; else s_xinc2= s_xinc;
// force calculation of the horizontal interpolation of the first line // force calculation of the horizontal interpolation of the first line
if(y==0){ if(srcSliceY ==0){
// printf("dstw %d, srcw %d, mmx2 %d\n", dstw, srcWidth, canMMX2BeUsed); // printf("dstW %d, srcw %d, mmx2 %d\n", dstW, srcWidth, canMMX2BeUsed);
s_last_ypos=-99; lastLumSrcY=-99;
s_last_y1pos=-99; lastChrSrcY=-99;
s_srcypos= s_yinc/2 - 0x8000; lumDstYInSrc= s_yinc/2 - 0x8000;
s_ypos=0; dstY=0;
// clean the buffers so that no green stuff is drawen if the width is not sane (%8=0) // clean the buffers so that no green stuff is drawen if the width is not sane (%8=0)
for(i=dstw-2; i<dstw+20; i++) for(i=dstW-2; i<dstW+20; i++)
{ {
pix_buf_uv[0][i] = pix_buf_uv[1][i] pix_buf_uv[0][i] = pix_buf_uv[1][i]
= pix_buf_uv[0][2048+i] = pix_buf_uv[1][2048+i] = 128*128; = pix_buf_uv[0][2048+i] = pix_buf_uv[1][2048+i] = 128*128;
@ -1471,7 +1471,7 @@ else s_xinc2= s_xinc;
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
// cant downscale !!! // cant downscale !!!
if((old_s_xinc != s_xinc || old_dstw!=dstw) && canMMX2BeUsed) if((old_s_xinc != s_xinc || old_dstW!=dstW) && canMMX2BeUsed)
{ {
uint8_t *fragment; uint8_t *fragment;
int imm8OfPShufW1; int imm8OfPShufW1;
@ -1481,7 +1481,7 @@ else s_xinc2= s_xinc;
int xpos, i; int xpos, i;
old_s_xinc= s_xinc; old_s_xinc= s_xinc;
old_dstw= dstw; old_dstW= dstW;
// create an optimized horizontal scaling routine // create an optimized horizontal scaling routine
@ -1533,10 +1533,10 @@ else s_xinc2= s_xinc;
/* choose xinc so that all 8 parts fit exactly /* choose xinc so that all 8 parts fit exactly
Note: we cannot use just 1 part because it would not fit in the code cache */ Note: we cannot use just 1 part because it would not fit in the code cache */
// s_xinc2_diff= -((((s_xinc2*(dstw/8))&0xFFFF))/(dstw/8))-10; // s_xinc2_diff= -((((s_xinc2*(dstW/8))&0xFFFF))/(dstW/8))-10;
// s_xinc_diff= -((((s_xinc*(dstw/8))&0xFFFF))/(dstw/8)); // s_xinc_diff= -((((s_xinc*(dstW/8))&0xFFFF))/(dstW/8));
#ifdef ALT_ERROR #ifdef ALT_ERROR
// s_xinc2_diff+= ((0x10000/(dstw/8))); // s_xinc2_diff+= ((0x10000/(dstW/8)));
#endif #endif
// s_xinc_diff= s_xinc2_diff*2; // s_xinc_diff= s_xinc2_diff*2;
@ -1545,7 +1545,7 @@ else s_xinc2= s_xinc;
// old_s_xinc= s_xinc; // old_s_xinc= s_xinc;
for(i=0; i<dstw/8; i++) for(i=0; i<dstW/8; i++)
{ {
int xx=xpos>>16; int xx=xpos>>16;
@ -1604,96 +1604,99 @@ else s_xinc2= s_xinc;
} // reset counters } // reset counters
while(1){ while(1){
unsigned char *dest =dstptr[0]+dststride*s_ypos; unsigned char *dest =dstptr[0]+dststride*dstY;
unsigned char *uDest=dstptr[1]+(dststride>>1)*(s_ypos>>1); unsigned char *uDest=dstptr[1]+(dststride>>1)*(dstY>>1);
unsigned char *vDest=dstptr[2]+(dststride>>1)*(s_ypos>>1); unsigned char *vDest=dstptr[2]+(dststride>>1)*(dstY>>1);
int y0=(s_srcypos + 0xFFFF)>>16; // first luminance source line number below the dst line int lumSrcY=(lumDstYInSrc + 0xFFFF)>>16; // first luminance source line number below the dst line
// points to the dst Pixels center in the source (0 is the center of pixel 0,0 in src) // points to the dst Pixels center in the source (0 is the center of pixel 0,0 in src)
int srcuvpos= dstbpp==12 ? s_srcypos + s_yinc/2 - 0x8000 : int chrDstYInSrc= dstbpp==12 ? lumDstYInSrc + s_yinc/2 - 0x8000 :
s_srcypos - 0x8000; lumDstYInSrc - 0x8000;
int y1=(srcuvpos + 0x1FFFF)>>17; // first chrominance source line number below the dst line int chrSrcY=(chrDstYInSrc + 0x1FFFF)>>17; // first chrominance source line number below the dst line
int yalpha=((s_srcypos-1)&0xFFFF)>>4; int yalpha= ((lumDstYInSrc-1)&0xFFFF )>>4;
int uvalpha=((srcuvpos-1)&0x1FFFF)>>5; int uvalpha=((chrDstYInSrc-1)&0x1FFFF)>>5;
uint16_t *buf0=pix_buf_y[y0&1]; // top line of the interpolated slice uint16_t *buf0=pix_buf_y[ lumSrcY &1]; // top line of the interpolated slice
uint16_t *buf1=pix_buf_y[((y0+1)&1)]; // bottom line of the interpolated slice uint16_t *buf1=pix_buf_y[(lumSrcY+1)&1]; // bottom line of the interpolated slice
uint16_t *uvbuf0=pix_buf_uv[y1&1]; // top line of the interpolated slice uint16_t *uvbuf0=pix_buf_uv[ chrSrcY &1]; // top line of the interpolated slice
uint16_t *uvbuf1=pix_buf_uv[(y1+1)&1]; // bottom line of the interpolated slice uint16_t *uvbuf1=pix_buf_uv[(chrSrcY+1)&1]; // bottom line of the interpolated slice
if(y0>=y+h) break; // FIXME wrong, skips last lines, but they are dupliactes anyway // if(lumSrcY>=srcSliceY + srcSliceH) break; // wrong, skips last lines, but they are dupliactes anyway
if(dstY >= dstH) break;
if((y0&1) && dstbpp==12) uvalpha=-1; // there is no alpha if there is no line // printf("lumSrcY:%d, dstY:%d, yalpha:%d\n", lumSrcY, dstY, yalpha*100/0x1000);
s_ypos++; s_srcypos+=s_yinc; if((dstY&1) && dstbpp==12) uvalpha=-1;
dstY++; lumDstYInSrc+=s_yinc;
//only interpolate the src line horizontally if we didnt do it allready //only interpolate the src line horizontally if we didnt do it allready
if(s_last_ypos!=y0) if(lastLumSrcY!=lumSrcY)
{ {
unsigned char *src; unsigned char *src;
// skip if first line has been horiz scaled alleady // skip if first line has been horiz scaled alleady
if(s_last_ypos != y0-1) if(lastLumSrcY != lumSrcY-1)
{ {
// check if first line is before any available src lines // check if first line is before any available src lines
if(y0-1 < y) src=srcptr[0]+(0 )*stride[0]; if(lumSrcY-1 < srcSliceY ) src=srcptr[0]+(0 )*stride[0];
else src=srcptr[0]+(y0-y-1)*stride[0]; else src=srcptr[0]+(lumSrcY-srcSliceY -1)*stride[0];
RENAME(hyscale)(buf0, dstw, src, srcWidth, s_xinc); RENAME(hyscale)(buf0, dstW, src, srcWidth, s_xinc);
} }
// check if second line is after any available src lines // check if second line is after any available src lines
if(y0-y >= h) src=srcptr[0]+(h-1)*stride[0]; if(lumSrcY-srcSliceY >= srcSliceH) src=srcptr[0]+(srcSliceH-1 )*stride[0];
else src=srcptr[0]+(y0-y)*stride[0]; else src=srcptr[0]+(lumSrcY-srcSliceY )*stride[0];
// the min() is required to avoid reuseing lines which where not available // the min() is required to avoid reuseing lines which where not available
s_last_ypos= MIN(y0, y+h-1); lastLumSrcY= MIN(lumSrcY, srcSliceY +srcSliceH-1);
RENAME(hyscale)(buf1, dstw, src, srcWidth, s_xinc); RENAME(hyscale)(buf1, dstW, src, srcWidth, s_xinc);
} }
// printf("%d %d %d %d\n", y, y1, s_last_y1pos, h); // printf("%d %d %d %d\n", y, chrSrcY, lastChrSrcY, h);
// *** horizontal scale U and V lines to temp buffer // *** horizontal scale U and V lines to temp buffer
if(s_last_y1pos!=y1) if(lastChrSrcY!=chrSrcY)
{ {
uint8_t *src1, *src2; uint8_t *src1, *src2;
// skip if first line has been horiz scaled alleady // skip if first line has been horiz scaled alleady
if(s_last_y1pos != y1-1) if(lastChrSrcY != chrSrcY-1)
{ {
// check if first line is before any available src lines // check if first line is before any available src lines
if(y1-y/2-1 < 0) if(chrSrcY-srcSliceY /2-1 < 0)
{ {
src1= srcptr[1]+(0)*stride[1]; src1= srcptr[1]+(0)*stride[1];
src2= srcptr[2]+(0)*stride[2]; src2= srcptr[2]+(0)*stride[2];
}else{ }else{
src1= srcptr[1]+(y1-y/2-1)*stride[1]; src1= srcptr[1]+(chrSrcY-srcSliceY /2-1)*stride[1];
src2= srcptr[2]+(y1-y/2-1)*stride[2]; src2= srcptr[2]+(chrSrcY-srcSliceY /2-1)*stride[2];
} }
RENAME(hcscale)(uvbuf0, dstUVw, src1, src2, srcWidth, s_xinc2); RENAME(hcscale)(uvbuf0, dstUVw, src1, src2, srcWidth, s_xinc2);
} }
// check if second line is after any available src lines // check if second line is after any available src lines
if(y1 - y/2 >= h/2) if(chrSrcY - srcSliceY /2 >= srcSliceH/2)
{ {
src1= srcptr[1]+(h/2-1)*stride[1]; src1= srcptr[1]+(srcSliceH/2-1)*stride[1];
src2= srcptr[2]+(h/2-1)*stride[2]; src2= srcptr[2]+(srcSliceH/2-1)*stride[2];
}else{ }else{
src1= srcptr[1]+(y1-y/2)*stride[1]; src1= srcptr[1]+(chrSrcY-srcSliceY /2)*stride[1];
src2= srcptr[2]+(y1-y/2)*stride[2]; src2= srcptr[2]+(chrSrcY-srcSliceY /2)*stride[2];
} }
RENAME(hcscale)(uvbuf1, dstUVw, src1, src2, srcWidth, s_xinc2); RENAME(hcscale)(uvbuf1, dstUVw, src1, src2, srcWidth, s_xinc2);
// the min() is required to avoid reuseing lines which where not available // the min() is required to avoid reuseing lines which where not available
s_last_y1pos= MIN(y1, y/2+h/2-1); lastChrSrcY= MIN(chrSrcY, srcSliceY /2+srcSliceH/2-1);
} }
#ifdef HAVE_MMX #ifdef HAVE_MMX
b5Dither= dither8[s_ypos&1]; b5Dither= dither8[dstY&1];
g6Dither= dither4[s_ypos&1]; g6Dither= dither4[dstY&1];
g5Dither= dither8[s_ypos&1]; g5Dither= dither8[dstY&1];
r5Dither= dither8[(s_ypos+1)&1]; r5Dither= dither8[(dstY+1)&1];
#endif #endif
if(dstbpp==12) //YV12 if(dstbpp==12) //YV12
RENAME(yuv2yuv)(buf0, buf1, uvbuf0, uvbuf1, dest, uDest, vDest, dstw, yalpha, uvalpha); RENAME(yuv2yuv)(buf0, buf1, uvbuf0, uvbuf1, dest, uDest, vDest, dstW, yalpha, uvalpha);
else if(ABS(s_yinc - 0x10000) < 10) else if(ABS(s_yinc - 0x10000) < 10)
RENAME(yuv2rgb1)(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp); RENAME(yuv2rgb1)(buf0, buf1, uvbuf0, uvbuf1, dest, dstW, yalpha, uvalpha, dstbpp);
else else
RENAME(yuv2rgbX)(buf0, buf1, uvbuf0, uvbuf1, dest, dstw, yalpha, uvalpha, dstbpp); RENAME(yuv2rgbX)(buf0, buf1, uvbuf0, uvbuf1, dest, dstW, yalpha, uvalpha, dstbpp);
} }
#ifdef HAVE_MMX #ifdef HAVE_MMX

Loading…
Cancel
Save