|
|
|
@ -63,13 +63,13 @@ |
|
|
|
|
static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ |
|
|
|
|
int numEq= 0, dcOk; |
|
|
|
|
src+= stride*4; // src points to begin of the 8x8 Block
|
|
|
|
|
asm volatile( |
|
|
|
|
asm volatile( |
|
|
|
|
"movq %0, %%mm7 \n\t" |
|
|
|
|
"movq %1, %%mm6 \n\t" |
|
|
|
|
: : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
asm volatile( |
|
|
|
|
asm volatile( |
|
|
|
|
"lea (%2, %3), %%"REG_a" \n\t" |
|
|
|
|
// 0 1 2 3 4 5 6 7 8 9
|
|
|
|
|
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
|
|
|
|
@ -318,8 +318,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) |
|
|
|
|
const int l9= stride + l8; |
|
|
|
|
int x; |
|
|
|
|
src+= stride*3; |
|
|
|
|
for(x=0; x<BLOCK_SIZE; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<BLOCK_SIZE; x++){ |
|
|
|
|
const int first= FFABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1]; |
|
|
|
|
const int last= FFABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8]; |
|
|
|
|
|
|
|
|
@ -440,16 +439,13 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP) |
|
|
|
|
int x; |
|
|
|
|
const int QP15= QP + (QP>>2); |
|
|
|
|
src+= stride*3; |
|
|
|
|
for(x=0; x<BLOCK_SIZE; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<BLOCK_SIZE; x++){ |
|
|
|
|
const int v = (src[x+l5] - src[x+l4]); |
|
|
|
|
if(FFABS(v) < QP15) |
|
|
|
|
{ |
|
|
|
|
if(FFABS(v) < QP15){ |
|
|
|
|
src[x+l3] +=v>>3; |
|
|
|
|
src[x+l4] +=v>>1; |
|
|
|
|
src[x+l5] -=v>>1; |
|
|
|
|
src[x+l6] -=v>>3; |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -566,8 +562,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
|
|
|
|
int x; |
|
|
|
|
|
|
|
|
|
src+= stride*3; |
|
|
|
|
for(x=0; x<BLOCK_SIZE; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<BLOCK_SIZE; x++){ |
|
|
|
|
int a= src[l3] - src[l4]; |
|
|
|
|
int b= src[l4] - src[l5]; |
|
|
|
|
int c= src[l5] - src[l6]; |
|
|
|
@ -575,8 +570,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
|
|
|
|
int d= FFABS(b) - ((FFABS(a) + FFABS(c))>>1); |
|
|
|
|
d= FFMAX(d, 0); |
|
|
|
|
|
|
|
|
|
if(d < co->QP*2) |
|
|
|
|
{ |
|
|
|
|
if(d < co->QP*2){ |
|
|
|
|
int v = d * FFSIGN(-b); |
|
|
|
|
|
|
|
|
|
src[l2] +=v>>3; |
|
|
|
@ -585,7 +579,6 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) |
|
|
|
|
src[l5] -=(3*v)>>3; |
|
|
|
|
src[l6] -=v>>2; |
|
|
|
|
src[l7] -=v>>3; |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
src++; |
|
|
|
|
} |
|
|
|
@ -825,11 +818,9 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
|
|
|
|
{ |
|
|
|
|
int x; |
|
|
|
|
src-= stride; |
|
|
|
|
for(x=0; x<BLOCK_SIZE; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<BLOCK_SIZE; x++){ |
|
|
|
|
const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]); |
|
|
|
|
if(FFABS(middleEnergy)< 8*QP) |
|
|
|
|
{ |
|
|
|
|
if(FFABS(middleEnergy)< 8*QP){ |
|
|
|
|
const int q=(src[l4] - src[l5])/2; |
|
|
|
|
const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]); |
|
|
|
|
const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]); |
|
|
|
@ -840,13 +831,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
|
|
|
|
d= (5*d + 32) >> 6; |
|
|
|
|
d*= FFSIGN(-middleEnergy); |
|
|
|
|
|
|
|
|
|
if(q>0) |
|
|
|
|
{ |
|
|
|
|
if(q>0){ |
|
|
|
|
d= d<0 ? 0 : d; |
|
|
|
|
d= d>q ? q : d; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
}else{ |
|
|
|
|
d= d>0 ? 0 : d; |
|
|
|
|
d= d<q ? q : d; |
|
|
|
|
} |
|
|
|
@ -856,12 +844,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext |
|
|
|
|
} |
|
|
|
|
src++; |
|
|
|
|
} |
|
|
|
|
src-=8; |
|
|
|
|
for(x=0; x<8; x++) |
|
|
|
|
{ |
|
|
|
|
src-=8; |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
|
int y; |
|
|
|
|
for(y=4; y<6; y++) |
|
|
|
|
{ |
|
|
|
|
for(y=4; y<6; y++){ |
|
|
|
|
int d= src[x+y*stride] - tmp[x+(y-4)*8]; |
|
|
|
|
int ad= FFABS(d); |
|
|
|
|
static int max=0; |
|
|
|
@ -871,14 +857,12 @@ src-=8; |
|
|
|
|
|
|
|
|
|
if(max<ad) max=ad; |
|
|
|
|
sum+= ad>3 ? 1 : 0; |
|
|
|
|
if(ad>3) |
|
|
|
|
{ |
|
|
|
|
if(ad>3){ |
|
|
|
|
src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255; |
|
|
|
|
} |
|
|
|
|
if(y==4) bias+=d; |
|
|
|
|
num++; |
|
|
|
|
if(num%1000000 == 0) |
|
|
|
|
{ |
|
|
|
|
if(num%1000000 == 0){ |
|
|
|
|
av_log(c, AV_LOG_INFO, " %d %d %d %d\n", num, sum, max, bias); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
@ -1129,11 +1113,9 @@ src-=8; |
|
|
|
|
// const int l9= stride + l8;
|
|
|
|
|
int x; |
|
|
|
|
src+= stride*3; |
|
|
|
|
for(x=0; x<BLOCK_SIZE; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<BLOCK_SIZE; x++){ |
|
|
|
|
const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]); |
|
|
|
|
if(FFABS(middleEnergy) < 8*c->QP) |
|
|
|
|
{ |
|
|
|
|
if(FFABS(middleEnergy) < 8*c->QP){ |
|
|
|
|
const int q=(src[l4] - src[l5])/2; |
|
|
|
|
const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]); |
|
|
|
|
const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]); |
|
|
|
@ -1144,13 +1126,10 @@ src-=8; |
|
|
|
|
d= (5*d + 32) >> 6; |
|
|
|
|
d*= FFSIGN(-middleEnergy); |
|
|
|
|
|
|
|
|
|
if(q>0) |
|
|
|
|
{ |
|
|
|
|
if(q>0){ |
|
|
|
|
d= d<0 ? 0 : d; |
|
|
|
|
d= d>q ? q : d; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
}else{ |
|
|
|
|
d= d>0 ? 0 : d; |
|
|
|
|
d= d<q ? q : d; |
|
|
|
|
} |
|
|
|
@ -1400,12 +1379,10 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
|
|
|
|
int s[10]; |
|
|
|
|
const int QP2= c->QP/2 + 1; |
|
|
|
|
|
|
|
|
|
for(y=1; y<9; y++) |
|
|
|
|
{ |
|
|
|
|
for(y=1; y<9; y++){ |
|
|
|
|
int x; |
|
|
|
|
p= src + stride*y; |
|
|
|
|
for(x=1; x<9; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=1; x<9; x++){ |
|
|
|
|
p++; |
|
|
|
|
if(*p > max) max= *p; |
|
|
|
|
if(*p < min) min= *p; |
|
|
|
@ -1415,8 +1392,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
|
|
|
|
|
|
|
|
|
if(max - min <deringThreshold) return; |
|
|
|
|
|
|
|
|
|
for(y=0; y<10; y++) |
|
|
|
|
{ |
|
|
|
|
for(y=0; y<10; y++){ |
|
|
|
|
int t = 0; |
|
|
|
|
|
|
|
|
|
if(src[stride*y + 0] > avg) t+= 1; |
|
|
|
@ -1435,24 +1411,20 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
|
|
|
|
s[y] = t; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for(y=1; y<9; y++) |
|
|
|
|
{ |
|
|
|
|
for(y=1; y<9; y++){ |
|
|
|
|
int t = s[y-1] & s[y] & s[y+1]; |
|
|
|
|
t|= t>>16; |
|
|
|
|
s[y-1]= t; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for(y=1; y<9; y++) |
|
|
|
|
{ |
|
|
|
|
for(y=1; y<9; y++){ |
|
|
|
|
int x; |
|
|
|
|
int t = s[y-1]; |
|
|
|
|
|
|
|
|
|
p= src + stride*y; |
|
|
|
|
for(x=1; x<9; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=1; x<9; x++){ |
|
|
|
|
p++; |
|
|
|
|
if(t & (1<<x)) |
|
|
|
|
{ |
|
|
|
|
if(t & (1<<x)){ |
|
|
|
|
int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1)) |
|
|
|
|
+2*(*(p -1)) + 4*(*p ) + 2*(*(p +1)) |
|
|
|
|
+(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1)); |
|
|
|
@ -1466,8 +1438,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
|
|
|
|
// if((max-min)<20 || (max-min)*QP<200)
|
|
|
|
|
// if((max-min)*QP < 500)
|
|
|
|
|
// if(max-min<QP/2)
|
|
|
|
|
if(max-min < 20) |
|
|
|
|
{ |
|
|
|
|
if(max-min < 20){ |
|
|
|
|
static int numSkiped=0; |
|
|
|
|
static int errorSum=0; |
|
|
|
|
static int worstQP=0; |
|
|
|
@ -1480,16 +1451,14 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
|
|
|
|
if(x==1 || x==8 || y==1 || y==8) continue; |
|
|
|
|
|
|
|
|
|
numSkiped++; |
|
|
|
|
if(absDiff > worstDiff) |
|
|
|
|
{ |
|
|
|
|
if(absDiff > worstDiff){ |
|
|
|
|
worstDiff= absDiff; |
|
|
|
|
worstQP= QP; |
|
|
|
|
worstRange= max-min; |
|
|
|
|
} |
|
|
|
|
errorSum+= error; |
|
|
|
|
|
|
|
|
|
if(1024LL*1024LL*1024LL % numSkiped == 0) |
|
|
|
|
{ |
|
|
|
|
if(1024LL*1024LL*1024LL % numSkiped == 0){ |
|
|
|
|
av_log(c, AV_LOG_INFO, "sum:%1.3f, skip:%d, wQP:%d, " |
|
|
|
|
"wRange:%d, wDiff:%d, relSkip:%1.3f\n", |
|
|
|
|
(float)errorSum/numSkiped, numSkiped, worstQP, worstRange, |
|
|
|
@ -1505,15 +1474,12 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
#ifdef DEBUG_DERING_THRESHOLD |
|
|
|
|
if(max-min < 20) |
|
|
|
|
{ |
|
|
|
|
for(y=1; y<9; y++) |
|
|
|
|
{ |
|
|
|
|
if(max-min < 20){ |
|
|
|
|
for(y=1; y<9; y++){ |
|
|
|
|
int x; |
|
|
|
|
int t = 0; |
|
|
|
|
p= src + stride*y; |
|
|
|
|
for(x=1; x<9; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=1; x<9; x++){ |
|
|
|
|
p++; |
|
|
|
|
*p = FFMIN(*p + 20, 255); |
|
|
|
|
} |
|
|
|
@ -1631,8 +1597,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
|
int x; |
|
|
|
|
src+= stride*3; |
|
|
|
|
for(x=0; x<8; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
|
src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4); |
|
|
|
|
src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4); |
|
|
|
|
src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4); |
|
|
|
@ -1703,8 +1668,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
|
int x; |
|
|
|
|
src+= stride*4; |
|
|
|
|
for(x=0; x<8; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
|
int t1= tmp[x]; |
|
|
|
|
int t2= src[stride*1]; |
|
|
|
|
|
|
|
|
@ -1794,8 +1758,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) |
|
|
|
|
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
|
|
|
|
|
int x; |
|
|
|
|
src+= stride*4; |
|
|
|
|
for(x=0; x<8; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
|
int t1= tmp[x]; |
|
|
|
|
int t2= tmp2[x]; |
|
|
|
|
int t3= src[0]; |
|
|
|
@ -2031,11 +1994,9 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) |
|
|
|
|
int x, y; |
|
|
|
|
src+= 4*stride; |
|
|
|
|
// FIXME - there should be a way to do a few columns in parallel like w/mmx
|
|
|
|
|
for(x=0; x<8; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
|
uint8_t *colsrc = src; |
|
|
|
|
for (y=0; y<4; y++) |
|
|
|
|
{ |
|
|
|
|
for (y=0; y<4; y++){ |
|
|
|
|
int a, b, c, d, e, f; |
|
|
|
|
a = colsrc[0 ]; |
|
|
|
|
b = colsrc[stride ]; |
|
|
|
@ -2525,11 +2486,9 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) |
|
|
|
|
// int sysd=0;
|
|
|
|
|
int i; |
|
|
|
|
|
|
|
|
|
for(y=0; y<8; y++) |
|
|
|
|
{ |
|
|
|
|
for(y=0; y<8; y++){ |
|
|
|
|
int x; |
|
|
|
|
for(x=0; x<8; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
|
int ref= tempBlured[ x + y*stride ]; |
|
|
|
|
int cur= src[ x + y*stride ]; |
|
|
|
|
int d1=ref - cur; |
|
|
|
@ -2557,15 +2516,11 @@ Switch between |
|
|
|
|
64 48 36 27 20 15 11 (33) (approx) |
|
|
|
|
64 56 49 43 37 33 29 (200) (approx) |
|
|
|
|
*/ |
|
|
|
|
if(d > maxNoise[1]) |
|
|
|
|
{ |
|
|
|
|
if(d < maxNoise[2]) |
|
|
|
|
{ |
|
|
|
|
for(y=0; y<8; y++) |
|
|
|
|
{ |
|
|
|
|
if(d > maxNoise[1]){ |
|
|
|
|
if(d < maxNoise[2]){ |
|
|
|
|
for(y=0; y<8; y++){ |
|
|
|
|
int x; |
|
|
|
|
for(x=0; x<8; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
|
int ref= tempBlured[ x + y*stride ]; |
|
|
|
|
int cur= src[ x + y*stride ]; |
|
|
|
|
tempBlured[ x + y*stride ]= |
|
|
|
@ -2573,28 +2528,19 @@ Switch between |
|
|
|
|
(ref + cur + 1)>>1; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
for(y=0; y<8; y++) |
|
|
|
|
{ |
|
|
|
|
}else{ |
|
|
|
|
for(y=0; y<8; y++){ |
|
|
|
|
int x; |
|
|
|
|
for(x=0; x<8; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
|
tempBlured[ x + y*stride ]= src[ x + y*stride ]; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
if(d < maxNoise[0]) |
|
|
|
|
{ |
|
|
|
|
for(y=0; y<8; y++) |
|
|
|
|
{ |
|
|
|
|
}else{ |
|
|
|
|
if(d < maxNoise[0]){ |
|
|
|
|
for(y=0; y<8; y++){ |
|
|
|
|
int x; |
|
|
|
|
for(x=0; x<8; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
|
int ref= tempBlured[ x + y*stride ]; |
|
|
|
|
int cur= src[ x + y*stride ]; |
|
|
|
|
tempBlured[ x + y*stride ]= |
|
|
|
@ -2602,14 +2548,10 @@ Switch between |
|
|
|
|
(ref*7 + cur + 4)>>3; |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
for(y=0; y<8; y++) |
|
|
|
|
{ |
|
|
|
|
}else{ |
|
|
|
|
for(y=0; y<8; y++){ |
|
|
|
|
int x; |
|
|
|
|
for(x=0; x<8; x++) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<8; x++){ |
|
|
|
|
int ref= tempBlured[ x + y*stride ]; |
|
|
|
|
int cur= src[ x + y*stride ]; |
|
|
|
|
tempBlured[ x + y*stride ]= |
|
|
|
@ -2633,13 +2575,13 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st |
|
|
|
|
int64_t sums[10*8*2]; |
|
|
|
|
src+= step*3; // src points to begin of the 8x8 Block
|
|
|
|
|
//START_TIMER
|
|
|
|
|
asm volatile( |
|
|
|
|
asm volatile( |
|
|
|
|
"movq %0, %%mm7 \n\t" |
|
|
|
|
"movq %1, %%mm6 \n\t" |
|
|
|
|
: : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) |
|
|
|
|
); |
|
|
|
|
|
|
|
|
|
asm volatile( |
|
|
|
|
asm volatile( |
|
|
|
|
"lea (%2, %3), %%"REG_a" \n\t" |
|
|
|
|
// 0 1 2 3 4 5 6 7 8 9
|
|
|
|
|
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
|
|
|
|
@ -3177,8 +3119,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t |
|
|
|
|
#ifndef HAVE_MMX |
|
|
|
|
int i; |
|
|
|
|
#endif |
|
|
|
|
if(levelFix) |
|
|
|
|
{ |
|
|
|
|
if(levelFix){ |
|
|
|
|
#ifdef HAVE_MMX |
|
|
|
|
asm volatile( |
|
|
|
|
"movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset
|
|
|
|
@ -3261,9 +3202,7 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) |
|
|
|
|
memcpy( &(dst[dstStride*i]), |
|
|
|
|
&(src[srcStride*i]), BLOCK_SIZE); |
|
|
|
|
#endif //HAVE_MMX
|
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
}else{ |
|
|
|
|
#ifdef HAVE_MMX |
|
|
|
|
asm volatile( |
|
|
|
|
"lea (%0,%2), %%"REG_a" \n\t" |
|
|
|
@ -3317,8 +3256,7 @@ static inline void RENAME(duplicate)(uint8_t src[], int stride) |
|
|
|
|
#else |
|
|
|
|
int i; |
|
|
|
|
uint8_t *p=src; |
|
|
|
|
for(i=0; i<3; i++) |
|
|
|
|
{ |
|
|
|
|
for(i=0; i<3; i++){ |
|
|
|
|
p-= stride; |
|
|
|
|
memcpy(p, src, 8); |
|
|
|
|
} |
|
|
|
@ -3381,8 +3319,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
|
|
|
|
|
copyAhead-= 8; |
|
|
|
|
|
|
|
|
|
if(!isColor) |
|
|
|
|
{ |
|
|
|
|
if(!isColor){ |
|
|
|
|
uint64_t sum= 0; |
|
|
|
|
int i; |
|
|
|
|
uint64_t maxClipped; |
|
|
|
@ -3393,8 +3330,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
// first frame is fscked so we ignore it
|
|
|
|
|
if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256; |
|
|
|
|
|
|
|
|
|
for(i=0; i<256; i++) |
|
|
|
|
{ |
|
|
|
|
for(i=0; i<256; i++){ |
|
|
|
|
sum+= yHistogram[i]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
@ -3402,15 +3338,13 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold); |
|
|
|
|
|
|
|
|
|
clipped= sum; |
|
|
|
|
for(black=255; black>0; black--) |
|
|
|
|
{ |
|
|
|
|
for(black=255; black>0; black--){ |
|
|
|
|
if(clipped < maxClipped) break; |
|
|
|
|
clipped-= yHistogram[black]; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
clipped= sum; |
|
|
|
|
for(white=0; white<256; white++) |
|
|
|
|
{ |
|
|
|
|
for(white=0; white<256; white++){ |
|
|
|
|
if(clipped < maxClipped) break; |
|
|
|
|
clipped-= yHistogram[white]; |
|
|
|
|
} |
|
|
|
@ -3433,9 +3367,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
|
|
|
|
|
if(mode & LEVEL_FIX) QPCorrecture= (int)(scale*256*256 + 0.5); |
|
|
|
|
else QPCorrecture= 256*256; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
}else{ |
|
|
|
|
c.packedYScale= 0x0100010001000100LL; |
|
|
|
|
c.packedYOffset= 0; |
|
|
|
|
QPCorrecture= 256*256; |
|
|
|
@ -3450,8 +3382,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
// From this point on it is guaranteed that we can read and write 16 lines downward
|
|
|
|
|
// finish 1 block before the next otherwise we might have a problem
|
|
|
|
|
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
|
|
|
|
|
for(x=0; x<width; x+=BLOCK_SIZE) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<width; x+=BLOCK_SIZE){ |
|
|
|
|
|
|
|
|
|
#ifdef HAVE_MMX2 |
|
|
|
|
/*
|
|
|
|
@ -3514,18 +3445,15 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
} |
|
|
|
|
if(width==FFABS(dstStride)) |
|
|
|
|
linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride); |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
else{ |
|
|
|
|
int i; |
|
|
|
|
for(i=0; i<copyAhead; i++) |
|
|
|
|
{ |
|
|
|
|
for(i=0; i<copyAhead; i++){ |
|
|
|
|
memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for(y=0; y<height; y+=BLOCK_SIZE) |
|
|
|
|
{ |
|
|
|
|
for(y=0; y<height; y+=BLOCK_SIZE){ |
|
|
|
|
//1% speedup if these are here instead of the inner loop
|
|
|
|
|
const uint8_t *srcBlock= &(src[y*srcStride]); |
|
|
|
|
uint8_t *dstBlock= &(dst[y*dstStride]); |
|
|
|
@ -3538,8 +3466,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
int QP=0; |
|
|
|
|
/* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
|
|
|
|
|
if not than use a temporary buffer */ |
|
|
|
|
if(y+15 >= height) |
|
|
|
|
{ |
|
|
|
|
if(y+15 >= height){ |
|
|
|
|
int i; |
|
|
|
|
/* copy from line (copyAhead) to (copyAhead+7) of src, these will be copied with
|
|
|
|
|
blockcopy to dst later */ |
|
|
|
@ -3564,19 +3491,15 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
// From this point on it is guaranteed that we can read and write 16 lines downward
|
|
|
|
|
// finish 1 block before the next otherwise we might have a problem
|
|
|
|
|
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
|
|
|
|
|
for(x=0; x<width; x+=BLOCK_SIZE) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<width; x+=BLOCK_SIZE){ |
|
|
|
|
const int stride= dstStride; |
|
|
|
|
#ifdef HAVE_MMX |
|
|
|
|
uint8_t *tmpXchg; |
|
|
|
|
#endif |
|
|
|
|
if(isColor) |
|
|
|
|
{ |
|
|
|
|
if(isColor){ |
|
|
|
|
QP= QPptr[x>>qpHShift]; |
|
|
|
|
c.nonBQP= nonBQPptr[x>>qpHShift]; |
|
|
|
|
} |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
}else{ |
|
|
|
|
QP= QPptr[x>>4]; |
|
|
|
|
QP= (QP* QPCorrecture + 256*128)>>16; |
|
|
|
|
c.nonBQP= nonBQPptr[x>>4]; |
|
|
|
@ -3653,12 +3576,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
*/ |
|
|
|
|
|
|
|
|
|
/* only deblock if we have 2 blocks */ |
|
|
|
|
if(y + 8 < height) |
|
|
|
|
{ |
|
|
|
|
if(y + 8 < height){ |
|
|
|
|
if(mode & V_X1_FILTER) |
|
|
|
|
RENAME(vertX1Filter)(dstBlock, stride, &c); |
|
|
|
|
else if(mode & V_DEBLOCK) |
|
|
|
|
{ |
|
|
|
|
else if(mode & V_DEBLOCK){ |
|
|
|
|
const int t= RENAME(vertClassify)(dstBlock, stride, &c); |
|
|
|
|
|
|
|
|
|
if(t==1) |
|
|
|
@ -3674,13 +3595,11 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); |
|
|
|
|
#endif |
|
|
|
|
/* check if we have a previous block to deblock it with dstBlock */ |
|
|
|
|
if(x - 8 >= 0) |
|
|
|
|
{ |
|
|
|
|
if(x - 8 >= 0){ |
|
|
|
|
#ifdef HAVE_MMX |
|
|
|
|
if(mode & H_X1_FILTER) |
|
|
|
|
RENAME(vertX1Filter)(tempBlock1, 16, &c); |
|
|
|
|
else if(mode & H_DEBLOCK) |
|
|
|
|
{ |
|
|
|
|
else if(mode & H_DEBLOCK){ |
|
|
|
|
//START_TIMER
|
|
|
|
|
const int t= RENAME(vertClassify)(tempBlock1, 16, &c); |
|
|
|
|
//STOP_TIMER("dc & minmax")
|
|
|
|
@ -3697,8 +3616,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
#else |
|
|
|
|
if(mode & H_X1_FILTER) |
|
|
|
|
horizX1Filter(dstBlock-4, stride, QP); |
|
|
|
|
else if(mode & H_DEBLOCK) |
|
|
|
|
{ |
|
|
|
|
else if(mode & H_DEBLOCK){ |
|
|
|
|
#ifdef HAVE_ALTIVEC |
|
|
|
|
DECLARE_ALIGNED(16, unsigned char, tempBlock[272]); |
|
|
|
|
transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); |
|
|
|
@ -3724,8 +3642,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c); |
|
|
|
|
} |
|
|
|
|
#endif //HAVE_MMX
|
|
|
|
|
if(mode & DERING) |
|
|
|
|
{ |
|
|
|
|
if(mode & DERING){ |
|
|
|
|
//FIXME filter first line
|
|
|
|
|
if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); |
|
|
|
|
} |
|
|
|
@ -3749,13 +3666,11 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
#endif |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if(mode & DERING) |
|
|
|
|
{ |
|
|
|
|
if(mode & DERING){ |
|
|
|
|
if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
if((mode & TEMP_NOISE_FILTER)) |
|
|
|
|
{ |
|
|
|
|
if((mode & TEMP_NOISE_FILTER)){ |
|
|
|
|
RENAME(tempNoiseReducer)(dstBlock-8, dstStride, |
|
|
|
|
c.tempBlured[isColor] + y*dstStride + x, |
|
|
|
|
c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3), |
|
|
|
@ -3763,29 +3678,25 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
/* did we use a tmp buffer for the last lines*/ |
|
|
|
|
if(y+15 >= height) |
|
|
|
|
{ |
|
|
|
|
if(y+15 >= height){ |
|
|
|
|
uint8_t *dstBlock= &(dst[y*dstStride]); |
|
|
|
|
if(width==FFABS(dstStride)) |
|
|
|
|
linecpy(dstBlock, tempDst + dstStride, height-y, dstStride); |
|
|
|
|
else |
|
|
|
|
{ |
|
|
|
|
else{ |
|
|
|
|
int i; |
|
|
|
|
for(i=0; i<height-y; i++) |
|
|
|
|
{ |
|
|
|
|
for(i=0; i<height-y; i++){ |
|
|
|
|
memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width); |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
/*
|
|
|
|
|
for(x=0; x<width; x+=32) |
|
|
|
|
{ |
|
|
|
|
for(x=0; x<width; x+=32){ |
|
|
|
|
volatile int i; |
|
|
|
|
i+= + dstBlock[x + 7*dstStride] + dstBlock[x + 8*dstStride] |
|
|
|
|
+ dstBlock[x + 9*dstStride] + dstBlock[x +10*dstStride] |
|
|
|
|
+ dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride]; |
|
|
|
|
// + dstBlock[x +13*dstStride]
|
|
|
|
|
// + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
|
|
|
|
|
+ dstBlock[x +13*dstStride] |
|
|
|
|
+ dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; |
|
|
|
|
}*/ |
|
|
|
|
} |
|
|
|
|
#ifdef HAVE_3DNOW |
|
|
|
@ -3795,15 +3706,13 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
#ifdef DEBUG_BRIGHTNESS |
|
|
|
|
if(!isColor) |
|
|
|
|
{ |
|
|
|
|
if(!isColor){ |
|
|
|
|
int max=1; |
|
|
|
|
int i; |
|
|
|
|
for(i=0; i<256; i++) |
|
|
|
|
if(yHistogram[i] > max) max=yHistogram[i]; |
|
|
|
|
|
|
|
|
|
for(i=1; i<256; i++) |
|
|
|
|
{ |
|
|
|
|
for(i=1; i<256; i++){ |
|
|
|
|
int x; |
|
|
|
|
int start=yHistogram[i-1]/(max/256+1); |
|
|
|
|
int end=yHistogram[i]/(max/256+1); |
|
|
|
@ -3812,12 +3721,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ |
|
|
|
|
dst[ i*dstStride + x]+=128; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
for(i=0; i<100; i+=2) |
|
|
|
|
{ |
|
|
|
|
for(i=0; i<100; i+=2){ |
|
|
|
|
dst[ (white)*dstStride + i]+=128; |
|
|
|
|
dst[ (black)*dstStride + i]+=128; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|