cosmetics: Fix indentation to be 4 spaces and consistently place {}.

Originally committed as revision 12552 to svn://svn.ffmpeg.org/ffmpeg/trunk
pull/126/head
Diego Biurrun 17 years ago
parent b19221c8ce
commit 16e0bf7349
  1. 135
      libpostproc/postprocess.c
  2. 9
      libpostproc/postprocess_altivec_template.c
  3. 3
      libpostproc/postprocess_internal.h
  4. 271
      libpostproc/postprocess_template.c

@ -190,8 +190,7 @@ static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
const int dcThreshold= dcOffset*2 + 1; const int dcThreshold= dcOffset*2 + 1;
for(y=0; y<BLOCK_SIZE; y++) for(y=0; y<BLOCK_SIZE; y++){
{
if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++; if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++; if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
@ -207,15 +206,15 @@ static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
/** /**
* Check if the middle 8x8 Block in the given 8x16 block is flat * Check if the middle 8x8 Block in the given 8x16 block is flat
*/ */
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
{
int numEq= 0; int numEq= 0;
int y; int y;
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
const int dcThreshold= dcOffset*2 + 1; const int dcThreshold= dcOffset*2 + 1;
src+= stride*4; // src points to begin of the 8x8 Block src+= stride*4; // src points to begin of the 8x8 Block
for(y=0; y<BLOCK_SIZE-1; y++) for(y=0; y<BLOCK_SIZE-1; y++){
{
if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++; if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++; if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++; if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
@ -258,8 +257,7 @@ static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
#if 1 #if 1
int x; int x;
src+= stride*4; src+= stride*4;
for(x=0; x<BLOCK_SIZE; x+=4) for(x=0; x<BLOCK_SIZE; x+=4){
{
if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0; if((unsigned)(src[ x + 0*stride] - src[ x + 5*stride] + 2*QP) > 4*QP) return 0;
if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0; if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0; if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
@ -268,8 +266,7 @@ static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
#else #else
int x; int x;
src+= stride*3; src+= stride*3;
for(x=0; x<BLOCK_SIZE; x++) for(x=0; x<BLOCK_SIZE; x++){
{
if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0; if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
} }
#endif #endif
@ -277,8 +274,7 @@ static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
#else #else
int x; int x;
src+= stride*4; src+= stride*4;
for(x=0; x<BLOCK_SIZE; x++) for(x=0; x<BLOCK_SIZE; x++){
{
int min=255; int min=255;
int max=0; int max=0;
int y; int y;
@ -293,7 +289,8 @@ static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
#endif #endif
} }
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){ static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
{
if( isHorizDC_C(src, stride, c) ){ if( isHorizDC_C(src, stride, c) ){
if( isHorizMinMaxOk_C(src, stride, c->QP) ) if( isHorizMinMaxOk_C(src, stride, c->QP) )
return 1; return 1;
@ -304,7 +301,8 @@ static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
} }
} }
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){ static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
{
if( isVertDC_C(src, stride, c) ){ if( isVertDC_C(src, stride, c) ){
if( isVertMinMaxOk_C(src, stride, c->QP) ) if( isVertMinMaxOk_C(src, stride, c->QP) )
return 1; return 1;
@ -318,12 +316,10 @@ static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
{ {
int y; int y;
for(y=0; y<BLOCK_SIZE; y++) for(y=0; y<BLOCK_SIZE; y++){
{
const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]); const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
if(FFABS(middleEnergy) < 8*c->QP) if(FFABS(middleEnergy) < 8*c->QP){
{
const int q=(dst[3] - dst[4])/2; const int q=(dst[3] - dst[4])/2;
const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]); const int leftEnergy= 5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]); const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
@ -359,8 +355,7 @@ static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
{ {
int y; int y;
for(y=0; y<BLOCK_SIZE; y++) for(y=0; y<BLOCK_SIZE; y++){
{
const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0]; const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7]; const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
@ -431,16 +426,14 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP)
} }
} }
for(y=0; y<BLOCK_SIZE; y++) for(y=0; y<BLOCK_SIZE; y++){
{
int a= src[1] - src[2]; int a= src[1] - src[2];
int b= src[3] - src[4]; int b= src[3] - src[4];
int c= src[5] - src[6]; int c= src[5] - src[6];
int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0); int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
if(d < QP) if(d < QP){
{
int v = d * FFSIGN(-b); int v = d * FFSIGN(-b);
src[1] +=v/8; src[1] +=v/8;
@ -449,7 +442,6 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP)
src[4] -=3*v/8; src[4] -=3*v/8;
src[5] -=v/4; src[5] -=v/4;
src[6] -=v/8; src[6] -=v/8;
} }
src+=stride; src+=stride;
} }
@ -524,8 +516,7 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
}else{ }else{
const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]); const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
if(FFABS(middleEnergy) < 8*QP) if(FFABS(middleEnergy) < 8*QP){
{
const int q=(src[3*step] - src[4*step])/2; const int q=(src[3*step] - src[4*step])/2;
const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]); const int leftEnergy= 5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]); const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
@ -536,13 +527,10 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
d= (5*d + 32) >> 6; d= (5*d + 32) >> 6;
d*= FFSIGN(-middleEnergy); d*= FFSIGN(-middleEnergy);
if(q>0) if(q>0){
{
d= d<0 ? 0 : d; d= d<0 ? 0 : d;
d= d>q ? q : d; d= d>q ? q : d;
} }else{
else
{
d= d>0 ? 0 : d; d= d>0 ? 0 : d;
d= d<q ? q : d; d= d<q ? q : d;
} }
@ -781,8 +769,7 @@ pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
filterName= strtok(filterToken, optionDelimiters); filterName= strtok(filterToken, optionDelimiters);
av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName); av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
if(*filterName == '-') if(*filterName == '-'){
{
enable=0; enable=0;
filterName++; filterName++;
} }
@ -796,8 +783,7 @@ pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0; else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1; else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0; else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
else else{
{
options[numOfUnknownOptions] = option; options[numOfUnknownOptions] = option;
numOfUnknownOptions++; numOfUnknownOptions++;
} }
@ -806,10 +792,8 @@ pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
options[numOfUnknownOptions] = NULL; options[numOfUnknownOptions] = NULL;
/* replace stuff from the replace Table */ /* replace stuff from the replace Table */
for(i=0; replaceTable[2*i]!=NULL; i++) for(i=0; replaceTable[2*i]!=NULL; i++){
{ if(!strcmp(replaceTable[2*i], filterName)){
if(!strcmp(replaceTable[2*i], filterName))
{
int newlen= strlen(replaceTable[2*i + 1]); int newlen= strlen(replaceTable[2*i + 1]);
int plen; int plen;
int spaceLeft; int spaceLeft;
@ -819,8 +803,7 @@ pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
plen= strlen(p); plen= strlen(p);
spaceLeft= p - temp + plen; spaceLeft= p - temp + plen;
if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE) if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE){
{
ppMode->error++; ppMode->error++;
break; break;
} }
@ -830,11 +813,9 @@ pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
} }
} }
for(i=0; filters[i].shortName!=NULL; i++) for(i=0; filters[i].shortName!=NULL; i++){
{
if( !strcmp(filters[i].longName, filterName) if( !strcmp(filters[i].longName, filterName)
|| !strcmp(filters[i].shortName, filterName)) || !strcmp(filters[i].shortName, filterName)){
{
ppMode->lumMode &= ~filters[i].mask; ppMode->lumMode &= ~filters[i].mask;
ppMode->chromMode &= ~filters[i].mask; ppMode->chromMode &= ~filters[i].mask;
@ -847,16 +828,13 @@ pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
if(q >= filters[i].minChromQuality) if(q >= filters[i].minChromQuality)
ppMode->chromMode|= filters[i].mask; ppMode->chromMode|= filters[i].mask;
if(filters[i].mask == LEVEL_FIX) if(filters[i].mask == LEVEL_FIX){
{
int o; int o;
ppMode->minAllowedY= 16; ppMode->minAllowedY= 16;
ppMode->maxAllowedY= 234; ppMode->maxAllowedY= 234;
for(o=0; options[o]!=NULL; o++) for(o=0; options[o]!=NULL; o++){
{
if( !strcmp(options[o],"fullyrange") if( !strcmp(options[o],"fullyrange")
||!strcmp(options[o],"f")) ||!strcmp(options[o],"f")){
{
ppMode->minAllowedY= 0; ppMode->minAllowedY= 0;
ppMode->maxAllowedY= 255; ppMode->maxAllowedY= 255;
numOfUnknownOptions--; numOfUnknownOptions--;
@ -868,13 +846,11 @@ pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
int o; int o;
int numOfNoises=0; int numOfNoises=0;
for(o=0; options[o]!=NULL; o++) for(o=0; options[o]!=NULL; o++){
{
char *tail; char *tail;
ppMode->maxTmpNoise[numOfNoises]= ppMode->maxTmpNoise[numOfNoises]=
strtol(options[o], &tail, 0); strtol(options[o], &tail, 0);
if(tail!=options[o]) if(tail!=options[o]){
{
numOfNoises++; numOfNoises++;
numOfUnknownOptions--; numOfUnknownOptions--;
if(numOfNoises >= 3) break; if(numOfNoises >= 3) break;
@ -882,12 +858,10 @@ pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
} }
} }
else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK
|| filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK) || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
{
int o; int o;
for(o=0; options[o]!=NULL && o<2; o++) for(o=0; options[o]!=NULL && o<2; o++){
{
char *tail; char *tail;
int val= strtol(options[o], &tail, 0); int val= strtol(options[o], &tail, 0);
if(tail==options[o]) break; if(tail==options[o]) break;
@ -897,13 +871,11 @@ pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
else ppMode->flatnessThreshold= val; else ppMode->flatnessThreshold= val;
} }
} }
else if(filters[i].mask == FORCE_QUANT) else if(filters[i].mask == FORCE_QUANT){
{
int o; int o;
ppMode->forcedQuant= 15; ppMode->forcedQuant= 15;
for(o=0; options[o]!=NULL && o<1; o++) for(o=0; options[o]!=NULL && o<1; o++){
{
char *tail; char *tail;
int val= strtol(options[o], &tail, 0); int val= strtol(options[o], &tail, 0);
if(tail==options[o]) break; if(tail==options[o]) break;
@ -919,8 +891,7 @@ pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
} }
av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode); av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
if(ppMode->error) if(ppMode->error){
{
av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name); av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
av_free(ppMode); av_free(ppMode);
return NULL; return NULL;
@ -952,8 +923,7 @@ static void reallocBuffers(PPContext *c, int width, int height, int stride, int
for(i=0; i<256; i++) for(i=0; i<256; i++)
c->yHistogram[i]= width*height/64*15/256; c->yHistogram[i]= width*height/64*15/256;
for(i=0; i<3; i++) for(i=0; i<3; i++){
{
//Note: The +17*1024 is just there so i do not have to worry about r/w over the end. //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024); reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
@ -1034,8 +1004,7 @@ void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
FFMAX(minStride, c->stride), FFMAX(minStride, c->stride),
FFMAX(c->qpStride, absQPStride)); FFMAX(c->qpStride, absQPStride));
if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
{
int i; int i;
QP_store= c->forcedQPTable; QP_store= c->forcedQPTable;
absQPStride = QPStride = 0; absQPStride = QPStride = 0;
@ -1058,20 +1027,19 @@ void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
QPStride= absQPStride; QPStride= absQPStride;
} }
if(0){ if(0){
int x,y; int x,y;
for(y=0; y<mbHeight; y++){ for(y=0; y<mbHeight; y++){
for(x=0; x<mbWidth; x++){ for(x=0; x<mbWidth; x++){
av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]); av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
} }
av_log(c, AV_LOG_INFO, "\n"); av_log(c, AV_LOG_INFO, "\n");
} }
av_log(c, AV_LOG_INFO, "\n"); av_log(c, AV_LOG_INFO, "\n");
} }
if((pict_type&7)!=3) if((pict_type&7)!=3){
{ if (QPStride >= 0){
if (QPStride >= 0) {
int i; int i;
const int count= mbHeight * QPStride; const int count= mbHeight * QPStride;
for(i=0; i<(count>>2); i++){ for(i=0; i<(count>>2); i++){
@ -1099,23 +1067,18 @@ for(y=0; y<mbHeight; y++){
width = (width )>>c->hChromaSubSample; width = (width )>>c->hChromaSubSample;
height = (height)>>c->vChromaSubSample; height = (height)>>c->vChromaSubSample;
if(mode->chromMode) if(mode->chromMode){
{
postProcess(src[1], srcStride[1], dst[1], dstStride[1], postProcess(src[1], srcStride[1], dst[1], dstStride[1],
width, height, QP_store, QPStride, 1, mode, c); width, height, QP_store, QPStride, 1, mode, c);
postProcess(src[2], srcStride[2], dst[2], dstStride[2], postProcess(src[2], srcStride[2], dst[2], dstStride[2],
width, height, QP_store, QPStride, 2, mode, c); width, height, QP_store, QPStride, 2, mode, c);
} }
else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
{
linecpy(dst[1], src[1], height, srcStride[1]); linecpy(dst[1], src[1], height, srcStride[1]);
linecpy(dst[2], src[2], height, srcStride[2]); linecpy(dst[2], src[2], height, srcStride[2]);
} }else{
else
{
int y; int y;
for(y=0; y<height; y++) for(y=0; y<height; y++){
{
memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
} }

@ -160,8 +160,7 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
v_numEq = vec_splat(v_numEq, 3); v_numEq = vec_splat(v_numEq, 3);
vec_ste(v_numEq, 0, &numEq); vec_ste(v_numEq, 0, &numEq);
if (numEq > c->ppMode.flatnessThreshold) if (numEq > c->ppMode.flatnessThreshold){
{
const vector unsigned char mmoP1 = (const vector unsigned char) const vector unsigned char mmoP1 = (const vector unsigned char)
AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B); 0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B);
@ -775,7 +774,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
vec_lvsr(i * stride, srcCopy); \ vec_lvsr(i * stride, srcCopy); \
const vector unsigned char maskST##i = \ const vector unsigned char maskST##i = \
vec_perm((vector unsigned char)zero, \ vec_perm((vector unsigned char)zero, \
(vector unsigned char)neg1, permST##i); \ (vector unsigned char)neg1, permST##i);\
src##i = vec_perm(src##i ,src##i, permST##i); \ src##i = vec_perm(src##i ,src##i, permST##i); \
sA##i= vec_sel(sA##i, src##i, maskST##i); \ sA##i= vec_sel(sA##i, src##i, maskST##i); \
sB##i= vec_sel(src##i, sB##i, maskST##i); \ sB##i= vec_sel(src##i, sB##i, maskST##i); \
@ -1168,8 +1167,8 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
vector unsigned char align##j = vec_lvsr(j * stride, dst); \ vector unsigned char align##j = vec_lvsr(j * stride, dst); \
vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \ vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \
vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \ vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \
vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i); \ vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i);\
vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j); \ vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j);\
vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \ vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i); \
vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \ vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i); \
vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \ vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j); \

@ -166,8 +166,7 @@ typedef struct PPContext{
} PPContext; } PPContext;
static inline void linecpy(void *dest, const void *src, int lines, int stride) static inline void linecpy(void *dest, const void *src, int lines, int stride) {
{
if (stride > 0) { if (stride > 0) {
memcpy(dest, src, lines*stride); memcpy(dest, src, lines*stride);
} else { } else {

@ -63,13 +63,13 @@
static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
int numEq= 0, dcOk; int numEq= 0, dcOk;
src+= stride*4; // src points to begin of the 8x8 Block src+= stride*4; // src points to begin of the 8x8 Block
asm volatile( asm volatile(
"movq %0, %%mm7 \n\t" "movq %0, %%mm7 \n\t"
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
: : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
); );
asm volatile( asm volatile(
"lea (%2, %3), %%"REG_a" \n\t" "lea (%2, %3), %%"REG_a" \n\t"
// 0 1 2 3 4 5 6 7 8 9 // 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
@ -318,8 +318,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
const int l9= stride + l8; const int l9= stride + l8;
int x; int x;
src+= stride*3; src+= stride*3;
for(x=0; x<BLOCK_SIZE; x++) for(x=0; x<BLOCK_SIZE; x++){
{
const int first= FFABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1]; const int first= FFABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
const int last= FFABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8]; const int last= FFABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
@ -440,16 +439,13 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
int x; int x;
const int QP15= QP + (QP>>2); const int QP15= QP + (QP>>2);
src+= stride*3; src+= stride*3;
for(x=0; x<BLOCK_SIZE; x++) for(x=0; x<BLOCK_SIZE; x++){
{
const int v = (src[x+l5] - src[x+l4]); const int v = (src[x+l5] - src[x+l4]);
if(FFABS(v) < QP15) if(FFABS(v) < QP15){
{
src[x+l3] +=v>>3; src[x+l3] +=v>>3;
src[x+l4] +=v>>1; src[x+l4] +=v>>1;
src[x+l5] -=v>>1; src[x+l5] -=v>>1;
src[x+l6] -=v>>3; src[x+l6] -=v>>3;
} }
} }
@ -566,8 +562,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
int x; int x;
src+= stride*3; src+= stride*3;
for(x=0; x<BLOCK_SIZE; x++) for(x=0; x<BLOCK_SIZE; x++){
{
int a= src[l3] - src[l4]; int a= src[l3] - src[l4];
int b= src[l4] - src[l5]; int b= src[l4] - src[l5];
int c= src[l5] - src[l6]; int c= src[l5] - src[l6];
@ -575,8 +570,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
int d= FFABS(b) - ((FFABS(a) + FFABS(c))>>1); int d= FFABS(b) - ((FFABS(a) + FFABS(c))>>1);
d= FFMAX(d, 0); d= FFMAX(d, 0);
if(d < co->QP*2) if(d < co->QP*2){
{
int v = d * FFSIGN(-b); int v = d * FFSIGN(-b);
src[l2] +=v>>3; src[l2] +=v>>3;
@ -585,7 +579,6 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
src[l5] -=(3*v)>>3; src[l5] -=(3*v)>>3;
src[l6] -=v>>2; src[l6] -=v>>2;
src[l7] -=v>>3; src[l7] -=v>>3;
} }
src++; src++;
} }
@ -825,11 +818,9 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
{ {
int x; int x;
src-= stride; src-= stride;
for(x=0; x<BLOCK_SIZE; x++) for(x=0; x<BLOCK_SIZE; x++){
{
const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]); const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
if(FFABS(middleEnergy)< 8*QP) if(FFABS(middleEnergy)< 8*QP){
{
const int q=(src[l4] - src[l5])/2; const int q=(src[l4] - src[l5])/2;
const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]); const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]); const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
@ -840,13 +831,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
d= (5*d + 32) >> 6; d= (5*d + 32) >> 6;
d*= FFSIGN(-middleEnergy); d*= FFSIGN(-middleEnergy);
if(q>0) if(q>0){
{
d= d<0 ? 0 : d; d= d<0 ? 0 : d;
d= d>q ? q : d; d= d>q ? q : d;
} }else{
else
{
d= d>0 ? 0 : d; d= d>0 ? 0 : d;
d= d<q ? q : d; d= d<q ? q : d;
} }
@ -856,12 +844,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
} }
src++; src++;
} }
src-=8; src-=8;
for(x=0; x<8; x++) for(x=0; x<8; x++){
{
int y; int y;
for(y=4; y<6; y++) for(y=4; y<6; y++){
{
int d= src[x+y*stride] - tmp[x+(y-4)*8]; int d= src[x+y*stride] - tmp[x+(y-4)*8];
int ad= FFABS(d); int ad= FFABS(d);
static int max=0; static int max=0;
@ -871,14 +857,12 @@ src-=8;
if(max<ad) max=ad; if(max<ad) max=ad;
sum+= ad>3 ? 1 : 0; sum+= ad>3 ? 1 : 0;
if(ad>3) if(ad>3){
{
src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255; src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255;
} }
if(y==4) bias+=d; if(y==4) bias+=d;
num++; num++;
if(num%1000000 == 0) if(num%1000000 == 0){
{
av_log(c, AV_LOG_INFO, " %d %d %d %d\n", num, sum, max, bias); av_log(c, AV_LOG_INFO, " %d %d %d %d\n", num, sum, max, bias);
} }
} }
@ -1129,11 +1113,9 @@ src-=8;
// const int l9= stride + l8; // const int l9= stride + l8;
int x; int x;
src+= stride*3; src+= stride*3;
for(x=0; x<BLOCK_SIZE; x++) for(x=0; x<BLOCK_SIZE; x++){
{
const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]); const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
if(FFABS(middleEnergy) < 8*c->QP) if(FFABS(middleEnergy) < 8*c->QP){
{
const int q=(src[l4] - src[l5])/2; const int q=(src[l4] - src[l5])/2;
const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]); const int leftEnergy= 5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]); const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
@ -1144,13 +1126,10 @@ src-=8;
d= (5*d + 32) >> 6; d= (5*d + 32) >> 6;
d*= FFSIGN(-middleEnergy); d*= FFSIGN(-middleEnergy);
if(q>0) if(q>0){
{
d= d<0 ? 0 : d; d= d<0 ? 0 : d;
d= d>q ? q : d; d= d>q ? q : d;
} }else{
else
{
d= d>0 ? 0 : d; d= d>0 ? 0 : d;
d= d<q ? q : d; d= d<q ? q : d;
} }
@ -1400,12 +1379,10 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
int s[10]; int s[10];
const int QP2= c->QP/2 + 1; const int QP2= c->QP/2 + 1;
for(y=1; y<9; y++) for(y=1; y<9; y++){
{
int x; int x;
p= src + stride*y; p= src + stride*y;
for(x=1; x<9; x++) for(x=1; x<9; x++){
{
p++; p++;
if(*p > max) max= *p; if(*p > max) max= *p;
if(*p < min) min= *p; if(*p < min) min= *p;
@ -1415,8 +1392,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
if(max - min <deringThreshold) return; if(max - min <deringThreshold) return;
for(y=0; y<10; y++) for(y=0; y<10; y++){
{
int t = 0; int t = 0;
if(src[stride*y + 0] > avg) t+= 1; if(src[stride*y + 0] > avg) t+= 1;
@ -1435,24 +1411,20 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
s[y] = t; s[y] = t;
} }
for(y=1; y<9; y++) for(y=1; y<9; y++){
{
int t = s[y-1] & s[y] & s[y+1]; int t = s[y-1] & s[y] & s[y+1];
t|= t>>16; t|= t>>16;
s[y-1]= t; s[y-1]= t;
} }
for(y=1; y<9; y++) for(y=1; y<9; y++){
{
int x; int x;
int t = s[y-1]; int t = s[y-1];
p= src + stride*y; p= src + stride*y;
for(x=1; x<9; x++) for(x=1; x<9; x++){
{
p++; p++;
if(t & (1<<x)) if(t & (1<<x)){
{
int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1)) int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1))
+2*(*(p -1)) + 4*(*p ) + 2*(*(p +1)) +2*(*(p -1)) + 4*(*p ) + 2*(*(p +1))
+(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1)); +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1));
@ -1466,8 +1438,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
// if((max-min)<20 || (max-min)*QP<200) // if((max-min)<20 || (max-min)*QP<200)
// if((max-min)*QP < 500) // if((max-min)*QP < 500)
// if(max-min<QP/2) // if(max-min<QP/2)
if(max-min < 20) if(max-min < 20){
{
static int numSkiped=0; static int numSkiped=0;
static int errorSum=0; static int errorSum=0;
static int worstQP=0; static int worstQP=0;
@ -1480,16 +1451,14 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
if(x==1 || x==8 || y==1 || y==8) continue; if(x==1 || x==8 || y==1 || y==8) continue;
numSkiped++; numSkiped++;
if(absDiff > worstDiff) if(absDiff > worstDiff){
{
worstDiff= absDiff; worstDiff= absDiff;
worstQP= QP; worstQP= QP;
worstRange= max-min; worstRange= max-min;
} }
errorSum+= error; errorSum+= error;
if(1024LL*1024LL*1024LL % numSkiped == 0) if(1024LL*1024LL*1024LL % numSkiped == 0){
{
av_log(c, AV_LOG_INFO, "sum:%1.3f, skip:%d, wQP:%d, " av_log(c, AV_LOG_INFO, "sum:%1.3f, skip:%d, wQP:%d, "
"wRange:%d, wDiff:%d, relSkip:%1.3f\n", "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
(float)errorSum/numSkiped, numSkiped, worstQP, worstRange, (float)errorSum/numSkiped, numSkiped, worstQP, worstRange,
@ -1505,15 +1474,12 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
} }
} }
#ifdef DEBUG_DERING_THRESHOLD #ifdef DEBUG_DERING_THRESHOLD
if(max-min < 20) if(max-min < 20){
{ for(y=1; y<9; y++){
for(y=1; y<9; y++)
{
int x; int x;
int t = 0; int t = 0;
p= src + stride*y; p= src + stride*y;
for(x=1; x<9; x++) for(x=1; x<9; x++){
{
p++; p++;
*p = FFMIN(*p + 20, 255); *p = FFMIN(*p + 20, 255);
} }
@ -1631,8 +1597,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc,
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
int x; int x;
src+= stride*3; src+= stride*3;
for(x=0; x<8; x++) for(x=0; x<8; x++){
{
src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4); src[stride*3] = CLIP((-src[0] + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4); src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4); src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
@ -1703,8 +1668,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
int x; int x;
src+= stride*4; src+= stride*4;
for(x=0; x<8; x++) for(x=0; x<8; x++){
{
int t1= tmp[x]; int t1= tmp[x];
int t2= src[stride*1]; int t2= src[stride*1];
@ -1794,8 +1758,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW) #else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
int x; int x;
src+= stride*4; src+= stride*4;
for(x=0; x<8; x++) for(x=0; x<8; x++){
{
int t1= tmp[x]; int t1= tmp[x];
int t2= tmp2[x]; int t2= tmp2[x];
int t3= src[0]; int t3= src[0];
@ -2031,11 +1994,9 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
int x, y; int x, y;
src+= 4*stride; src+= 4*stride;
// FIXME - there should be a way to do a few columns in parallel like w/mmx // FIXME - there should be a way to do a few columns in parallel like w/mmx
for(x=0; x<8; x++) for(x=0; x<8; x++){
{
uint8_t *colsrc = src; uint8_t *colsrc = src;
for (y=0; y<4; y++) for (y=0; y<4; y++){
{
int a, b, c, d, e, f; int a, b, c, d, e, f;
a = colsrc[0 ]; a = colsrc[0 ];
b = colsrc[stride ]; b = colsrc[stride ];
@ -2525,11 +2486,9 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
// int sysd=0; // int sysd=0;
int i; int i;
for(y=0; y<8; y++) for(y=0; y<8; y++){
{
int x; int x;
for(x=0; x<8; x++) for(x=0; x<8; x++){
{
int ref= tempBlured[ x + y*stride ]; int ref= tempBlured[ x + y*stride ];
int cur= src[ x + y*stride ]; int cur= src[ x + y*stride ];
int d1=ref - cur; int d1=ref - cur;
@ -2557,15 +2516,11 @@ Switch between
64 48 36 27 20 15 11 (33) (approx) 64 48 36 27 20 15 11 (33) (approx)
64 56 49 43 37 33 29 (200) (approx) 64 56 49 43 37 33 29 (200) (approx)
*/ */
if(d > maxNoise[1]) if(d > maxNoise[1]){
{ if(d < maxNoise[2]){
if(d < maxNoise[2]) for(y=0; y<8; y++){
{
for(y=0; y<8; y++)
{
int x; int x;
for(x=0; x<8; x++) for(x=0; x<8; x++){
{
int ref= tempBlured[ x + y*stride ]; int ref= tempBlured[ x + y*stride ];
int cur= src[ x + y*stride ]; int cur= src[ x + y*stride ];
tempBlured[ x + y*stride ]= tempBlured[ x + y*stride ]=
@ -2573,28 +2528,19 @@ Switch between
(ref + cur + 1)>>1; (ref + cur + 1)>>1;
} }
} }
} }else{
else for(y=0; y<8; y++){
{
for(y=0; y<8; y++)
{
int x; int x;
for(x=0; x<8; x++) for(x=0; x<8; x++){
{
tempBlured[ x + y*stride ]= src[ x + y*stride ]; tempBlured[ x + y*stride ]= src[ x + y*stride ];
} }
} }
} }
} }else{
else if(d < maxNoise[0]){
{ for(y=0; y<8; y++){
if(d < maxNoise[0])
{
for(y=0; y<8; y++)
{
int x; int x;
for(x=0; x<8; x++) for(x=0; x<8; x++){
{
int ref= tempBlured[ x + y*stride ]; int ref= tempBlured[ x + y*stride ];
int cur= src[ x + y*stride ]; int cur= src[ x + y*stride ];
tempBlured[ x + y*stride ]= tempBlured[ x + y*stride ]=
@ -2602,14 +2548,10 @@ Switch between
(ref*7 + cur + 4)>>3; (ref*7 + cur + 4)>>3;
} }
} }
} }else{
else for(y=0; y<8; y++){
{
for(y=0; y<8; y++)
{
int x; int x;
for(x=0; x<8; x++) for(x=0; x<8; x++){
{
int ref= tempBlured[ x + y*stride ]; int ref= tempBlured[ x + y*stride ];
int cur= src[ x + y*stride ]; int cur= src[ x + y*stride ];
tempBlured[ x + y*stride ]= tempBlured[ x + y*stride ]=
@ -2633,13 +2575,13 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
int64_t sums[10*8*2]; int64_t sums[10*8*2];
src+= step*3; // src points to begin of the 8x8 Block src+= step*3; // src points to begin of the 8x8 Block
//START_TIMER //START_TIMER
asm volatile( asm volatile(
"movq %0, %%mm7 \n\t" "movq %0, %%mm7 \n\t"
"movq %1, %%mm6 \n\t" "movq %1, %%mm6 \n\t"
: : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) : : "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP])
); );
asm volatile( asm volatile(
"lea (%2, %3), %%"REG_a" \n\t" "lea (%2, %3), %%"REG_a" \n\t"
// 0 1 2 3 4 5 6 7 8 9 // 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
@ -3177,8 +3119,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
#ifndef HAVE_MMX #ifndef HAVE_MMX
int i; int i;
#endif #endif
if(levelFix) if(levelFix){
{
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( asm volatile(
"movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset
@ -3261,9 +3202,7 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
memcpy( &(dst[dstStride*i]), memcpy( &(dst[dstStride*i]),
&(src[srcStride*i]), BLOCK_SIZE); &(src[srcStride*i]), BLOCK_SIZE);
#endif //HAVE_MMX #endif //HAVE_MMX
} }else{
else
{
#ifdef HAVE_MMX #ifdef HAVE_MMX
asm volatile( asm volatile(
"lea (%0,%2), %%"REG_a" \n\t" "lea (%0,%2), %%"REG_a" \n\t"
@ -3317,8 +3256,7 @@ static inline void RENAME(duplicate)(uint8_t src[], int stride)
#else #else
int i; int i;
uint8_t *p=src; uint8_t *p=src;
for(i=0; i<3; i++) for(i=0; i<3; i++){
{
p-= stride; p-= stride;
memcpy(p, src, 8); memcpy(p, src, 8);
} }
@ -3381,8 +3319,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
copyAhead-= 8; copyAhead-= 8;
if(!isColor) if(!isColor){
{
uint64_t sum= 0; uint64_t sum= 0;
int i; int i;
uint64_t maxClipped; uint64_t maxClipped;
@ -3393,8 +3330,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
// first frame is fscked so we ignore it // first frame is fscked so we ignore it
if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256; if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256;
for(i=0; i<256; i++) for(i=0; i<256; i++){
{
sum+= yHistogram[i]; sum+= yHistogram[i];
} }
@ -3402,15 +3338,13 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold); maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
clipped= sum; clipped= sum;
for(black=255; black>0; black--) for(black=255; black>0; black--){
{
if(clipped < maxClipped) break; if(clipped < maxClipped) break;
clipped-= yHistogram[black]; clipped-= yHistogram[black];
} }
clipped= sum; clipped= sum;
for(white=0; white<256; white++) for(white=0; white<256; white++){
{
if(clipped < maxClipped) break; if(clipped < maxClipped) break;
clipped-= yHistogram[white]; clipped-= yHistogram[white];
} }
@ -3433,9 +3367,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
if(mode & LEVEL_FIX) QPCorrecture= (int)(scale*256*256 + 0.5); if(mode & LEVEL_FIX) QPCorrecture= (int)(scale*256*256 + 0.5);
else QPCorrecture= 256*256; else QPCorrecture= 256*256;
} }else{
else
{
c.packedYScale= 0x0100010001000100LL; c.packedYScale= 0x0100010001000100LL;
c.packedYOffset= 0; c.packedYOffset= 0;
QPCorrecture= 256*256; QPCorrecture= 256*256;
@ -3450,8 +3382,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
// From this point on it is guaranteed that we can read and write 16 lines downward // From this point on it is guaranteed that we can read and write 16 lines downward
// finish 1 block before the next otherwise we might have a problem // finish 1 block before the next otherwise we might have a problem
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
for(x=0; x<width; x+=BLOCK_SIZE) for(x=0; x<width; x+=BLOCK_SIZE){
{
#ifdef HAVE_MMX2 #ifdef HAVE_MMX2
/* /*
@ -3514,18 +3445,15 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
} }
if(width==FFABS(dstStride)) if(width==FFABS(dstStride))
linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride); linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
else else{
{
int i; int i;
for(i=0; i<copyAhead; i++) for(i=0; i<copyAhead; i++){
{
memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width); memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
} }
} }
} }
for(y=0; y<height; y+=BLOCK_SIZE) for(y=0; y<height; y+=BLOCK_SIZE){
{
//1% speedup if these are here instead of the inner loop //1% speedup if these are here instead of the inner loop
const uint8_t *srcBlock= &(src[y*srcStride]); const uint8_t *srcBlock= &(src[y*srcStride]);
uint8_t *dstBlock= &(dst[y*dstStride]); uint8_t *dstBlock= &(dst[y*dstStride]);
@ -3538,8 +3466,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
int QP=0; int QP=0;
/* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
if not than use a temporary buffer */ if not than use a temporary buffer */
if(y+15 >= height) if(y+15 >= height){
{
int i; int i;
/* copy from line (copyAhead) to (copyAhead+7) of src, these will be copied with /* copy from line (copyAhead) to (copyAhead+7) of src, these will be copied with
blockcopy to dst later */ blockcopy to dst later */
@ -3564,19 +3491,15 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
// From this point on it is guaranteed that we can read and write 16 lines downward // From this point on it is guaranteed that we can read and write 16 lines downward
// finish 1 block before the next otherwise we might have a problem // finish 1 block before the next otherwise we might have a problem
// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
for(x=0; x<width; x+=BLOCK_SIZE) for(x=0; x<width; x+=BLOCK_SIZE){
{
const int stride= dstStride; const int stride= dstStride;
#ifdef HAVE_MMX #ifdef HAVE_MMX
uint8_t *tmpXchg; uint8_t *tmpXchg;
#endif #endif
if(isColor) if(isColor){
{
QP= QPptr[x>>qpHShift]; QP= QPptr[x>>qpHShift];
c.nonBQP= nonBQPptr[x>>qpHShift]; c.nonBQP= nonBQPptr[x>>qpHShift];
} }else{
else
{
QP= QPptr[x>>4]; QP= QPptr[x>>4];
QP= (QP* QPCorrecture + 256*128)>>16; QP= (QP* QPCorrecture + 256*128)>>16;
c.nonBQP= nonBQPptr[x>>4]; c.nonBQP= nonBQPptr[x>>4];
@ -3653,12 +3576,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
*/ */
/* only deblock if we have 2 blocks */ /* only deblock if we have 2 blocks */
if(y + 8 < height) if(y + 8 < height){
{
if(mode & V_X1_FILTER) if(mode & V_X1_FILTER)
RENAME(vertX1Filter)(dstBlock, stride, &c); RENAME(vertX1Filter)(dstBlock, stride, &c);
else if(mode & V_DEBLOCK) else if(mode & V_DEBLOCK){
{
const int t= RENAME(vertClassify)(dstBlock, stride, &c); const int t= RENAME(vertClassify)(dstBlock, stride, &c);
if(t==1) if(t==1)
@ -3674,13 +3595,11 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
#endif #endif
/* check if we have a previous block to deblock it with dstBlock */ /* check if we have a previous block to deblock it with dstBlock */
if(x - 8 >= 0) if(x - 8 >= 0){
{
#ifdef HAVE_MMX #ifdef HAVE_MMX
if(mode & H_X1_FILTER) if(mode & H_X1_FILTER)
RENAME(vertX1Filter)(tempBlock1, 16, &c); RENAME(vertX1Filter)(tempBlock1, 16, &c);
else if(mode & H_DEBLOCK) else if(mode & H_DEBLOCK){
{
//START_TIMER //START_TIMER
const int t= RENAME(vertClassify)(tempBlock1, 16, &c); const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
//STOP_TIMER("dc & minmax") //STOP_TIMER("dc & minmax")
@ -3697,8 +3616,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
#else #else
if(mode & H_X1_FILTER) if(mode & H_X1_FILTER)
horizX1Filter(dstBlock-4, stride, QP); horizX1Filter(dstBlock-4, stride, QP);
else if(mode & H_DEBLOCK) else if(mode & H_DEBLOCK){
{
#ifdef HAVE_ALTIVEC #ifdef HAVE_ALTIVEC
DECLARE_ALIGNED(16, unsigned char, tempBlock[272]); DECLARE_ALIGNED(16, unsigned char, tempBlock[272]);
transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
@ -3724,8 +3642,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c); RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
} }
#endif //HAVE_MMX #endif //HAVE_MMX
if(mode & DERING) if(mode & DERING){
{
//FIXME filter first line //FIXME filter first line
if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
} }
@ -3749,13 +3666,11 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
#endif #endif
} }
if(mode & DERING) if(mode & DERING){
{
if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c); if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
} }
if((mode & TEMP_NOISE_FILTER)) if((mode & TEMP_NOISE_FILTER)){
{
RENAME(tempNoiseReducer)(dstBlock-8, dstStride, RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
c.tempBlured[isColor] + y*dstStride + x, c.tempBlured[isColor] + y*dstStride + x,
c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3), c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
@ -3763,29 +3678,25 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
} }
/* did we use a tmp buffer for the last lines*/ /* did we use a tmp buffer for the last lines*/
if(y+15 >= height) if(y+15 >= height){
{
uint8_t *dstBlock= &(dst[y*dstStride]); uint8_t *dstBlock= &(dst[y*dstStride]);
if(width==FFABS(dstStride)) if(width==FFABS(dstStride))
linecpy(dstBlock, tempDst + dstStride, height-y, dstStride); linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
else else{
{
int i; int i;
for(i=0; i<height-y; i++) for(i=0; i<height-y; i++){
{
memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width); memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
} }
} }
} }
/* /*
for(x=0; x<width; x+=32) for(x=0; x<width; x+=32){
{
volatile int i; volatile int i;
i+= + dstBlock[x + 7*dstStride] + dstBlock[x + 8*dstStride] i+= + dstBlock[x + 7*dstStride] + dstBlock[x + 8*dstStride]
+ dstBlock[x + 9*dstStride] + dstBlock[x +10*dstStride] + dstBlock[x + 9*dstStride] + dstBlock[x +10*dstStride]
+ dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride]; + dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride];
// + dstBlock[x +13*dstStride] + dstBlock[x +13*dstStride]
// + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
}*/ }*/
} }
#ifdef HAVE_3DNOW #ifdef HAVE_3DNOW
@ -3795,15 +3706,13 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
#endif #endif
#ifdef DEBUG_BRIGHTNESS #ifdef DEBUG_BRIGHTNESS
if(!isColor) if(!isColor){
{
int max=1; int max=1;
int i; int i;
for(i=0; i<256; i++) for(i=0; i<256; i++)
if(yHistogram[i] > max) max=yHistogram[i]; if(yHistogram[i] > max) max=yHistogram[i];
for(i=1; i<256; i++) for(i=1; i<256; i++){
{
int x; int x;
int start=yHistogram[i-1]/(max/256+1); int start=yHistogram[i-1]/(max/256+1);
int end=yHistogram[i]/(max/256+1); int end=yHistogram[i]/(max/256+1);
@ -3812,12 +3721,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
dst[ i*dstStride + x]+=128; dst[ i*dstStride + x]+=128;
} }
for(i=0; i<100; i+=2) for(i=0; i<100; i+=2){
{
dst[ (white)*dstStride + i]+=128; dst[ (white)*dstStride + i]+=128;
dst[ (black)*dstStride + i]+=128; dst[ (black)*dstStride + i]+=128;
} }
} }
#endif #endif

Loading…
Cancel
Save