diff --git a/postproc/postprocess.c b/postproc/postprocess.c index 33ebf42c34..f558a1e9f6 100644 --- a/postproc/postprocess.c +++ b/postproc/postprocess.c @@ -122,7 +122,7 @@ static uint64_t temp3=0; static uint64_t temp4=0; static uint64_t temp5=0; static uint64_t pQPb=0; -static uint8_t tempBlock[16*16]; +static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data int hFlatnessThreshold= 56 - 16; int vFlatnessThreshold= 56 - 16; @@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01; int maxAllowedY=255; //FIXME can never make a movieŽs black brighter (anyone needs that?) -int minAllowedY=0; +int minAllowedY=16; #ifdef TIMING static inline long long rdtsc() @@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri static uint8_t *tempDst= NULL; static uint8_t *tempSrc= NULL; + /* Temporary buffers for handling the last block */ + static uint8_t *tempDstBlock= NULL; + static uint8_t *tempSrcBlock= NULL; + + uint8_t *dstBlockPtrBackup; + uint8_t *srcBlockPtrBackup; + #ifdef TIMING long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; sumTime= rdtsc(); @@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri { tempDst= (uint8_t*)memalign(8, 1024*24); tempSrc= (uint8_t*)memalign(8, 1024*24); + tempDstBlock= (uint8_t*)memalign(8, 1024*24); + tempSrcBlock= (uint8_t*)memalign(8, 1024*24); } if(!yHistogram) @@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri int i; yHistogram= (uint64_t*)malloc(8*256); for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256; + + if(mode & FULL_Y_RANGE) + { + maxAllowedY=255; + minAllowedY=0; + } } if(!isColor) @@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri srcBlock= tempSrc; } + // From this point on it is guranteed that we can read and write 16 lines downward // finish 1 block before the next otherwise weŽll might have a problem // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing for(x=0; x= width) + { + int i; + dstBlockPtrBackup= dstBlock; + srcBlockPtrBackup= srcBlock; + + for(i=0;i= 0 && x= 0) { #ifdef MORE_TIMING T0= rdtsc(); @@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri dering(dstBlock - stride*9 + width-9, stride, QP); //FIXME dering filter will not be applied to last block (bottom right) + /* did we use a tmp-block buffer */ + if(x+7 >= width) + { + int i; + dstBlock= dstBlockPtrBackup; + srcBlock= srcBlockPtrBackup; + + for(i=0;i height) + if(y+15 >= height) { uint8_t *dstBlock= &(dst[y*dstStride]); memcpy(dstBlock, tempDst, dstStride*(height-y) ); diff --git a/postproc/postprocess.h b/postproc/postprocess.h index e7eb248512..20880a9874 100644 --- a/postproc/postprocess.h +++ b/postproc/postprocess.h @@ -46,6 +46,9 @@ #define H_RK1_FILTER 0x1000 // 4096 (not implemented yet) #define H_X1_FILTER 0x2000 // 8192 +// select between full y range (255-0) or standart one ( +#define FULL_Y_RANGE 0x8000 // 32768 + //Deinterlacing Filters #define LINEAR_IPOL_DEINT_FILTER 0x10000 // 65536 #define LINEAR_BLEND_DEINT_FILTER 0x20000 // 131072 diff --git a/postproc/postprocess_template.c b/postproc/postprocess_template.c index 33ebf42c34..f558a1e9f6 100644 --- a/postproc/postprocess_template.c +++ b/postproc/postprocess_template.c @@ -122,7 +122,7 @@ static uint64_t temp3=0; static uint64_t temp4=0; static uint64_t temp5=0; static uint64_t pQPb=0; -static uint8_t tempBlock[16*16]; +static uint8_t tempBlock[16*16]; //used so the horizontal code gets aligned data int hFlatnessThreshold= 56 - 16; int vFlatnessThreshold= 56 - 16; @@ -132,7 +132,7 @@ double maxClippedThreshold= 0.01; int maxAllowedY=255; //FIXME can never make a movieŽs black brighter (anyone needs that?) -int minAllowedY=0; +int minAllowedY=16; #ifdef TIMING static inline long long rdtsc() @@ -2398,6 +2398,13 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri static uint8_t *tempDst= NULL; static uint8_t *tempSrc= NULL; + /* Temporary buffers for handling the last block */ + static uint8_t *tempDstBlock= NULL; + static uint8_t *tempSrcBlock= NULL; + + uint8_t *dstBlockPtrBackup; + uint8_t *srcBlockPtrBackup; + #ifdef TIMING long long T0, T1, memcpyTime=0, vertTime=0, horizTime=0, sumTime, diffTime=0; sumTime= rdtsc(); @@ -2407,6 +2414,8 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri { tempDst= (uint8_t*)memalign(8, 1024*24); tempSrc= (uint8_t*)memalign(8, 1024*24); + tempDstBlock= (uint8_t*)memalign(8, 1024*24); + tempSrcBlock= (uint8_t*)memalign(8, 1024*24); } if(!yHistogram) @@ -2414,6 +2423,12 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri int i; yHistogram= (uint64_t*)malloc(8*256); for(i=0; i<256; i++) yHistogram[i]= width*height/64*15/256; + + if(mode & FULL_Y_RANGE) + { + maxAllowedY=255; + minAllowedY=0; + } } if(!isColor) @@ -2505,6 +2520,7 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri srcBlock= tempSrc; } + // From this point on it is guranteed that we can read and write 16 lines downward // finish 1 block before the next otherwise weŽll might have a problem // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing for(x=0; x= width) + { + int i; + dstBlockPtrBackup= dstBlock; + srcBlockPtrBackup= srcBlock; + + for(i=0;i= 0 && x= 0) { #ifdef MORE_TIMING T0= rdtsc(); @@ -2624,12 +2657,25 @@ static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStri dering(dstBlock - stride*9 + width-9, stride, QP); //FIXME dering filter will not be applied to last block (bottom right) + /* did we use a tmp-block buffer */ + if(x+7 >= width) + { + int i; + dstBlock= dstBlockPtrBackup; + srcBlock= srcBlockPtrBackup; + + for(i=0;i height) + if(y+15 >= height) { uint8_t *dstBlock= &(dst[y*dstStride]); memcpy(dstBlock, tempDst, dstStride*(height-y) );