void matrix_demean(gsl_matrix *input){ gsl_vector *mean = gsl_vector_alloc(input->size2); matrix_mean(mean, input); size_t NCOL = input->size2; size_t i; gsl_vector_view column; #pragma omp parallel for private(column) for (i = 0; i < NCOL; i++) { column = gsl_matrix_column(input, i); gsl_vector_add_constant( &column.vector, -gsl_vector_get(mean, i)); } }
/** \brief Principal Components analysis. \ingroup grplinalg This implementation uses SVD to calculate the PCA. Example: \code Array *X = get_data_from_somewhere(); Array *var, *X_pca; matrix_pca( X, &var, FALSE ); // overwrite X X_pca = matrix_pca( X, &var, TRUE ); // do not touch X \endcode \param X a 2D array observations x variables containing the data \param var output: vector of eigenvalues of XX^T in decreasing order; if you pass NULL, it is ignored; \param alloc if true, new memory is allocated and returned. Else X is overwritten. \return pointer to the PCA'd matrix */ Array* matrix_pca( Array *X, Array **var, bool alloc ){ Array *out=NULL; int i,j; bool ismatrix; matrix_CHECK( ismatrix, X ); if( !ismatrix ) return NULL; int N,K; /* N observations, K variables */ N = X->size[0]; K = X->size[1]; Array *tmp = array_copy( X, TRUE ); /* subtract mean from observations */ Array *mean=matrix_mean( X, 0 ); for( i=0; i<N; i++ ){ for( j=0; j<K; j++ ){ array_INDEX2( tmp, double, i, j ) -= array_INDEX1( mean, double, j ); } } array_scale( tmp, 1.0/sqrt((double) N-1 ) ); gsl_matrix *A=matrix_to_gsl( tmp, TRUE ); /* copy */ gsl_matrix *V=gsl_matrix_alloc( K, K ); gsl_vector *S=gsl_vector_alloc( K ); gsl_vector *workspace=gsl_vector_alloc( K ); /* A->U, V->V, S->S */ gsl_linalg_SV_decomp( A, V, S, workspace); gsl_matrix_transpose( V ); if( var ){ (*var)=array_fromptr2( DOUBLE, 1, S->data, S->size ); S->owner=0; /* transfer ownership to array */ (*var)->free_data=1; for( i=0; i<array_NUMEL( *var ); i++ ){ array_INDEX1( *var, double, i ) = SQR( array_INDEX1( *var, double, i ) ); } } Array *Vp=array_fromptr2( DOUBLE, 2, V->data, V->size1, V->size2 ); matrix_transpose( tmp, FALSE ); out=matrix_mult( Vp, tmp ); /* PCA'd data */ matrix_transpose( out, FALSE ); if( out->size[0]!=X->size[0] || out->size[1]!=X->size[1] ){ errprintf("Input/Output dimension mismatch: (%i,%i) vs. (%i,%i)\n", X->size[0], X->size[1], out->size[0], out->size[1] ); } if( !alloc ){ /* write back out->X */ memcpy( X->data, out->data, out->nbytes ); array_free( out ); out = X; } /* clean up */ gsl_matrix_free( A ); gsl_matrix_free( V ); gsl_vector_free( S ); gsl_vector_free( workspace ); array_free( mean ); array_free( Vp ); array_free( tmp ); return out; }