T KMeans<T,DS>::avgIntraClusterDeviation() { Matrix<T,Dynamic,1> deviates(this->K_); deviates.setZero(this->K_); #pragma omp parallel for for (uint32_t k=0; k<this->K_; ++k) { this->cls_[k]->N() = 0.0; for (uint32_t i=0; i<this->N_; ++i) if(this->cld_->z(i) == k) { deviates(k) += this->cls_[k]->dist( this->cld_->x()->col(i)); this->cls_[k]->N() ++; } // if(this->Ns_(k) > 0.0) deviates(k) /= this->Ns_(k); } return deviates.sum()/ this->N_; }
void PrincipalComponentsAnalysis::compute(DataFrame& df) { if (df.getNumFactors() > 2) { // see PrincipalComponentsAnalysisTest cout << "You realize this hasn't been tested, right?" << endl; } Matrix dataMat(df.getNumFactors(), df.getNumDataVectors()); Matrix deviates(df.getNumFactors(), df.getNumDataVectors()); SymmetricMatrix covar(df.getNumFactors()); DiagonalMatrix eigenValues(df.getNumFactors()); Matrix eigenVectors; ColumnVector means(df.getNumFactors()); means = 0.0; RowVector h(df.getNumDataVectors()); h = 1.0; for (unsigned int j = 0; j < df.getNumFactors(); j++) { if (df.isNominal(j)) { throw Tgs::Exception("Only numeric values are supported."); } } for(unsigned int i = 0; i < df.getNumDataVectors(); i++) { for (unsigned int j = 0; j < df.getNumFactors(); j++) { double v = df.getDataElement(i, j); if (df.isNull(v)) { throw Tgs::Exception("Only non-null values are supported."); } dataMat.element(j, i) = v; means.element(j) += v / (double)df.getNumDataVectors(); } } try { deviates = dataMat - (means * h); covar << (1.0/(float)df.getNumDataVectors()) * (deviates * deviates.t()); Jacobi::jacobi(covar, eigenValues, eigenVectors); } catch (const std::exception&) { throw; } catch (...) { throw Tgs::Exception("Unknown error while calculating PCA"); } _sortEigens(eigenVectors, eigenValues); _components.resize(df.getNumFactors()); for (unsigned int v = 0; v < df.getNumFactors(); v++) { _components[v].resize(df.getNumFactors()); for (unsigned int d = 0; d < df.getNumFactors(); d++) { _components[v][d] = eigenVectors.element(d, v); } } }