You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

249 lines
6.4 KiB

/*
* principal component analysis (PCA)
* Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* principal component analysis (PCA)
*/
#include "common.h"
#include "pca.h"
typedef struct PCA{
int count;
int n;
double *covariance;
double *mean;
double *z;
}PCA;
PCA *ff_pca_init(int n){
PCA *pca;
if(n<=0)
return NULL;
pca= av_mallocz(sizeof(*pca));
pca->n= n;
pca->z = av_malloc_array(n, sizeof(*pca->z));
pca->count=0;
pca->covariance= av_calloc(n*n, sizeof(double));
pca->mean= av_calloc(n, sizeof(double));
return pca;
}
void ff_pca_free(PCA *pca){
av_freep(&pca->covariance);
av_freep(&pca->mean);
av_freep(&pca->z);
av_free(pca);
}
void ff_pca_add(PCA *pca, const double *v){
int i, j;
const int n= pca->n;
for(i=0; i<n; i++){
pca->mean[i] += v[i];
for(j=i; j<n; j++)
pca->covariance[j + i*n] += v[i]*v[j];
}
pca->count++;
}
int ff_pca(PCA *pca, double *eigenvector, double *eigenvalue){
int i, j, pass;
int k=0;
const int n= pca->n;
double *z = pca->z;
memset(eigenvector, 0, sizeof(double)*n*n);
for(j=0; j<n; j++){
pca->mean[j] /= pca->count;
eigenvector[j + j*n] = 1.0;
for(i=0; i<=j; i++){
pca->covariance[j + i*n] /= pca->count;
pca->covariance[j + i*n] -= pca->mean[i] * pca->mean[j];
pca->covariance[i + j*n] = pca->covariance[j + i*n];
}
eigenvalue[j]= pca->covariance[j + j*n];
z[j]= 0;
}
for(pass=0; pass < 50; pass++){
double sum=0;
for(i=0; i<n; i++)
for(j=i+1; j<n; j++)
sum += fabs(pca->covariance[j + i*n]);
if(sum == 0){
for(i=0; i<n; i++){
double maxvalue= -1;
for(j=i; j<n; j++){
if(eigenvalue[j] > maxvalue){
maxvalue= eigenvalue[j];
k= j;
}
}
eigenvalue[k]= eigenvalue[i];
eigenvalue[i]= maxvalue;
for(j=0; j<n; j++){
double tmp= eigenvector[k + j*n];
eigenvector[k + j*n]= eigenvector[i + j*n];
eigenvector[i + j*n]= tmp;
}
}
return pass;
}
for(i=0; i<n; i++){
for(j=i+1; j<n; j++){
double covar= pca->covariance[j + i*n];
double t,c,s,tau,theta, h;
if(pass < 3 && fabs(covar) < sum / (5*n*n)) //FIXME why pass < 3
continue;
if(fabs(covar) == 0.0) //FIXME should not be needed
continue;
if(pass >=3 && fabs((eigenvalue[j]+z[j])/covar) > (1LL<<32) && fabs((eigenvalue[i]+z[i])/covar) > (1LL<<32)){
pca->covariance[j + i*n]=0.0;
continue;
}
h= (eigenvalue[j]+z[j]) - (eigenvalue[i]+z[i]);
theta=0.5*h/covar;
t=1.0/(fabs(theta)+sqrt(1.0+theta*theta));
if(theta < 0.0) t = -t;
c=1.0/sqrt(1+t*t);
s=t*c;
tau=s/(1.0+c);
z[i] -= t*covar;
z[j] += t*covar;
#define ROTATE(a,i,j,k,l) {\
double g=a[j + i*n];\
double h=a[l + k*n];\
a[j + i*n]=g-s*(h+g*tau);\
a[l + k*n]=h+s*(g-h*tau); }
for(k=0; k<n; k++) {
if(k!=i && k!=j){
ROTATE(pca->covariance,FFMIN(k,i),FFMAX(k,i),FFMIN(k,j),FFMAX(k,j))
}
ROTATE(eigenvector,k,i,k,j)
}
pca->covariance[j + i*n]=0.0;
}
}
for (i=0; i<n; i++) {
eigenvalue[i] += z[i];
z[i]=0.0;
}
}
return -1;
}
#ifdef TEST
#undef printf
#include <stdio.h>
#include <stdlib.h>
#include "lfg.h"
int main(void){
PCA *pca;
int i, j, k;
#define LEN 8
double eigenvector[LEN*LEN];
double eigenvalue[LEN];
AVLFG prng;
av_lfg_init(&prng, 1);
pca= ff_pca_init(LEN);
for(i=0; i<9000000; i++){
double v[2*LEN+100];
double sum=0;
int pos = av_lfg_get(&prng) % LEN;
int v2 = av_lfg_get(&prng) % 101 - 50;
v[0] = av_lfg_get(&prng) % 101 - 50;
for(j=1; j<8; j++){
if(j<=pos) v[j]= v[0];
else v[j]= v2;
sum += v[j];
}
/* for(j=0; j<LEN; j++){
v[j] -= v[pos];
}*/
// sum += av_lfg_get(&prng) % 10;
/* for(j=0; j<LEN; j++){
v[j] -= sum/LEN;
}*/
// lbt1(v+100,v+100,LEN);
ff_pca_add(pca, v);
}
ff_pca(pca, eigenvector, eigenvalue);
for(i=0; i<LEN; i++){
pca->count= 1;
pca->mean[i]= 0;
// (0.5^|x|)^2 = 0.5^2|x| = 0.25^|x|
// pca.covariance[i + i*LEN]= pow(0.5, fabs
for(j=i; j<LEN; j++){
printf("%f ", pca->covariance[i + j*LEN]);
}
printf("\n");
}
for(i=0; i<LEN; i++){
double v[LEN];
double error=0;
memset(v, 0, sizeof(v));
for(j=0; j<LEN; j++){
for(k=0; k<LEN; k++){
v[j] += pca->covariance[FFMIN(k,j) + FFMAX(k,j)*LEN] * eigenvector[i + k*LEN];
}
v[j] /= eigenvalue[i];
error += fabs(v[j] - eigenvector[i + j*LEN]);
}
printf("%f ", error);
}
printf("\n");
for(i=0; i<LEN; i++){
for(j=0; j<LEN; j++){
printf("%9.6f ", eigenvector[i + j*LEN]);
}
printf(" %9.1f %f\n", eigenvalue[i], eigenvalue[i]/eigenvalue[0]);
}
return 0;
}
#endif