fixed a bug in the tmp buffer

fixed the color range for yuv
fixed the width %8!=0 bug (another 1% speed loss)

Originally committed as revision 2286 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
pull/126/head
Michael Niedermayer 24 years ago
parent 0f25d72b3f
commit 658a85f26f
  1. 54
      postproc/postprocess.c
  2. 3
      postproc/postprocess.h
  3. 54
      postproc/postprocess_template.c

@ -122,7 +122,7 @@ static uint64_t temp3=0;
static uint64_t temp4=0;
static uint64_t temp5=0;
static uint64_t pQPb=0;
static uint8_t tempBlock[16*16];
static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data
int hFlatnessThreshold= 56 - 16;
int vFlatnessThreshold= 56 - 16;
@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01;
int maxAllowedY=255;
//FIXME can never make a movie´s black brighter (anyone needs that?)
int minAllowedY=0;
int minAllowedY=16;
#ifdef TIMING
static inline long long rdtsc()
@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
static uint8_t *tempDst= NULL;
static uint8_t *tempSrc= NULL;
/* Temporary buffers for handling the last block */
static uint8_t *tempDstBlock= NULL;
static uint8_t *tempSrcBlock= NULL;
uint8_t *dstBlockPtrBackup;
uint8_t *srcBlockPtrBackup;
#ifdef TIMING
long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
sumTime= rdtsc();
@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
{
tempDst= (uint8_t*)memalign(8, 1024*24);
tempSrc= (uint8_t*)memalign(8, 1024*24);
tempDstBlock= (uint8_t*)memalign(8, 1024*24);
tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
}
if(!yHistogram)
@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
int i;
yHistogram= (uint64_t*)malloc(8*256);
for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
if(mode & FULL_Y_RANGE)
{
maxAllowedY=255;
minAllowedY=0;
}
}
if(!isColor)
@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
srcBlock= tempSrc;
}
// From this point on it is guranteed that we can read and write 16 lines downward
// finish 1 block before the next otherwise we´ll might have a problem
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
for(x=0; x<width; x+=BLOCK_SIZE)
@ -2545,6 +2561,23 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
//can we mess with a 8x16 block, if not use a temp buffer, yes again
if(x+7 >= width)
{
int i;
dstBlockPtrBackup= dstBlock;
srcBlockPtrBackup= srcBlock;
for(i=0;i<BLOCK_SIZE*2; i++)
{
memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
}
dstBlock= tempDstBlock;
srcBlock= tempSrcBlock;
}
blockCopy(dstBlock + dstStride*5, dstStride,
srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
@ -2593,7 +2626,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
}
/* check if we have a previous block to deblock it with dstBlock */
if(x - 8 >= 0 && x<width)
if(x - 8 >= 0)
{
#ifdef MORE_TIMING
T0= rdtsc();
@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
dering(dstBlock - stride*9 + width-9, stride, QP);
//FIXME dering filter will not be applied to last block (bottom right)
/* did we use a tmp-block buffer */
if(x+7 >= width)
{
int i;
dstBlock= dstBlockPtrBackup;
srcBlock= srcBlockPtrBackup;
for(i=0;i<BLOCK_SIZE*2; i++)
{
memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
}
}
dstBlock+=8;
srcBlock+=8;
}
/* did we use a tmp buffer */
if(y+15 > height)
if(y+15 >= height)
{
uint8_t *dstBlock= &(dst[y*dstStride]);
memcpy(dstBlock, tempDst, dstStride*(height-y) );

@ -46,6 +46,9 @@
#define H_RK1_FILTER 0x1000 // 4096 (not implemented yet)
#define H_X1_FILTER 0x2000 // 8192
// select between full y range (255-0) or standart one (
#define FULL_Y_RANGE 0x8000 // 32768
//Deinterlacing Filters
#define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536
#define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072

@ -122,7 +122,7 @@ static uint64_t temp3=0;
static uint64_t temp4=0;
static uint64_t temp5=0;
static uint64_t pQPb=0;
static uint8_t tempBlock[16*16];
static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data
int hFlatnessThreshold= 56 - 16;
int vFlatnessThreshold= 56 - 16;
@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01;
int maxAllowedY=255;
//FIXME can never make a movie´s black brighter (anyone needs that?)
int minAllowedY=0;
int minAllowedY=16;
#ifdef TIMING
static inline long long rdtsc()
@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
static uint8_t *tempDst= NULL;
static uint8_t *tempSrc= NULL;
/* Temporary buffers for handling the last block */
static uint8_t *tempDstBlock= NULL;
static uint8_t *tempSrcBlock= NULL;
uint8_t *dstBlockPtrBackup;
uint8_t *srcBlockPtrBackup;
#ifdef TIMING
long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0;
sumTime= rdtsc();
@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
{
tempDst= (uint8_t*)memalign(8, 1024*24);
tempSrc= (uint8_t*)memalign(8, 1024*24);
tempDstBlock= (uint8_t*)memalign(8, 1024*24);
tempSrcBlock= (uint8_t*)memalign(8, 1024*24);
}
if(!yHistogram)
@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
int i;
yHistogram= (uint64_t*)malloc(8*256);
for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256;
if(mode & FULL_Y_RANGE)
{
maxAllowedY=255;
minAllowedY=0;
}
}
if(!isColor)
@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
srcBlock= tempSrc;
}
// From this point on it is guranteed that we can read and write 16 lines downward
// finish 1 block before the next otherwise we´ll might have a problem
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
for(x=0; x<width; x+=BLOCK_SIZE)
@ -2545,6 +2561,23 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
if(!isColor) yHistogram[ srcBlock[srcStride*5] ]++;
//can we mess with a 8x16 block, if not use a temp buffer, yes again
if(x+7 >= width)
{
int i;
dstBlockPtrBackup= dstBlock;
srcBlockPtrBackup= srcBlock;
for(i=0;i<BLOCK_SIZE*2; i++)
{
memcpy(tempSrcBlock+i*srcStride, srcBlock+i*srcStride, width-x);
memcpy(tempDstBlock+i*dstStride, dstBlock+i*dstStride, width-x);
}
dstBlock= tempDstBlock;
srcBlock= tempSrcBlock;
}
blockCopy(dstBlock + dstStride*5, dstStride,
srcBlock + srcStride*5, srcStride, 8, mode & LEVEL_FIX);
@ -2593,7 +2626,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
}
/* check if we have a previous block to deblock it with dstBlock */
if(x - 8 >= 0 && x<width)
if(x - 8 >= 0)
{
#ifdef MORE_TIMING
T0= rdtsc();
@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri
dering(dstBlock - stride*9 + width-9, stride, QP);
//FIXME dering filter will not be applied to last block (bottom right)
/* did we use a tmp-block buffer */
if(x+7 >= width)
{
int i;
dstBlock= dstBlockPtrBackup;
srcBlock= srcBlockPtrBackup;
for(i=0;i<BLOCK_SIZE*2; i++)
{
memcpy(dstBlock+i*dstStride, tempDstBlock+i*dstStride, width-x);
}
}
dstBlock+=8;
srcBlock+=8;
}
/* did we use a tmp buffer */
if(y+15 > height)
if(y+15 >= height)
{
uint8_t *dstBlock= &(dst[y*dstStride]);
memcpy(dstBlock, tempDst, dstStride*(height-y) );

Loading…
Cancel
Save