/** recompute cluster membership as well as eigen systems for each cluster */ void KMeansClustering::recomputeClusterStats( unsigned stats ) { unsigned long i, j; // recompute the eigen systems if required if( clusterStatsValid< stats && stats> 0 ) { // update cluster memberships unsigned long numPoints= data->data.size(); for( i= 0 ; i< numClusters ; i++ ) clusters[i].memberCount= 0; for( i= 0 ; i< numPoints ; i++ ) { j= bestCluster( data->data[i] ); clusterIDs[i]= j; clusters[j].memberCount++; } // compute covariance matrices Vector h( data->data[0].getSize() ); for( i= 0 ; i< numClusters ; i++ ) clusters[i].cov.zero(); for( i= 0 ; i< numPoints ; i++ ) { h.copy( data->data[i] ); h-= clusters[clusterIDs[i]].mean; clusters[clusterIDs[i]].cov.addOuterProduct( h ); } for( i= 0 ; i< numClusters ; i++ ) clusters[i].cov/= (double)clusters[i].memberCount; // compute eigen vectors, eigen values JacobiRotation solver; for( i= 0 ; i< numClusters ; i++ ) solver.solve( clusters[i].cov, clusters[i].eigenValues, clusters[i].eigenVectors ); clusterStatsValid= 1; } // if necessary, restore upper triangle of covariance matrices // (may have been destroyed by eigensolver) if( clusterStatsValid< 2 && stats>= 2 ) { unsigned dim= data->data[0].getSize(); for( unsigned k= 0 ; k< numClusters ; k++ ) { Matrix &m= clusters[k].cov; for( i= 0 ; i< dim ; i++ ) for( j= i+1 ; j< dim ; j++ ) m[j][i]= m[i][j]; } clusterStatsValid= 2; } }
/** split a specific cluster along its largest eigenvector */ void KMeansClustering::splitCluster( unsigned cluster ) { unsigned long i; // recompute eigen systems if necessary if( clusterStatsValid< 1 ) recomputeClusterStats( 1 ); // split the current mean into two along the largest eigenvector unsigned dimension= data->data[0].getSize(); clusters.push_back( clusters[cluster] ); // copy mean to new cluster ClusterStats *c1= &(clusters[cluster]); ClusterStats *c2= &(clusters[numClusters]); c1->mean.addScalarTimesVector( c1->eigenValues[0], c1->eigenVectors[0] ); c2->mean.addScalarTimesVector( -c1->eigenValues[0], c1->eigenVectors[0] ); // update the cluster membership only for members of the origonal cluster unsigned long numPoints= data->data.size(); for( i= 0 ; i< numPoints ; i++ ) if( clusterIDs[i]== cluster ) if( dist( data->data[i], c2->mean )< dist( data->data[i], c1->mean ) ) clusterIDs[i]= numClusters; // update the two means to actually be the means of the respective // point cluster c1->mean.zero(); c2->mean.zero(); c1->memberCount= c2->memberCount= 0; for( i= 0 ; i< numPoints ; i++ ) { if( clusterIDs[i]== cluster ) { c1->mean+= data->data[i]; c1->memberCount++; } if( clusterIDs[i]== numClusters ) { c2->mean+= data->data[i]; c2->memberCount++; } } c1->mean/= (double)c1->memberCount; c2->mean/= (double)c2->memberCount; // update the two convariance matrices; Vector h( data->data[0].getSize() ); c1->cov.zero(); c2->cov.zero(); for( i= 0 ; i< numPoints ; i++ ) { if( clusterIDs[i]== cluster ) { h.copy( data->data[i] ); h-= c1->mean; c1->cov.addOuterProduct( h ); } if( clusterIDs[i]== numClusters ) { h.copy( data->data[i] ); h-= c2->mean; c2->cov.addOuterProduct( h ); } } c1->cov/= (double)c1->memberCount; c2->cov/= (double)c2->memberCount; // recompute eigen values, eigen vectors JacobiRotation solver; solver.solve( c1->cov, c1->eigenValues, c1->eigenVectors ); solver.solve( c2->cov, c2->eigenValues, c2->eigenVectors ); // we have one more cluster now numClusters++; }
/** merge two specified clusters */ void KMeansClustering::mergeClusters( unsigned c1, unsigned c2 ) { unsigned long i, j; // make sure c1 is the cluster with the smaller ID if( c1> c2 ) { i= c1; c1= c2; c2=i; } /* cerr << "Before:\n"; for( i= 0 ; i< numClusters ; i++ ) { for( j= 0 ; j< clusters[i].mean.getSize() ; j++ ) cerr << ' ' << clusters[i].eigenValues[j]; cerr << "\t\t" << clusters[i].memberCount << endl; } */ // new mean is old means weighted by memeber counts double w= clusters[c1].memberCount; clusters[c1].memberCount+= clusters[c2].memberCount; w/= clusters[c1].memberCount; clusters[c1].mean*= w; clusters[c1].mean.addScalarTimesVector( 1.0-w, clusters[c2].mean ); // update cluster membership, covariance matrix, eigenvalues of c1 // (only if these quantities are valid for the other clusters) if( clusterStatsValid> 0 ) { // covariance matrix unsigned long numPoints= data->data.size(); Vector h( data->data[0].getSize() ); clusters[c1].cov.zero(); for( i= 0 ; i< numPoints ; i++ ) { // merge merge all points from c2 into c1 if( clusterIDs[i]== c2 ) clusterIDs[i]= c1; if( clusterIDs[i]== c1 ) { h.copy( data->data[i] ); h-= clusters[c1].mean; clusters[c1].cov.addOuterProduct( h ); } // the cluster indices > c2 are now reduced by one if( clusterIDs[i]> c2 ) clusterIDs[i]--; } clusters[c1].cov/= clusters[c1].memberCount; // eigenvalues and eigenvectors JacobiRotation solver; solver.solve( clusters[c1].cov, clusters[c1].eigenValues, clusters[c1].eigenVectors ); } // consolidate array numClusters--; for( i= c2 ; i< numClusters ; i++ ) clusters[i].copy( clusters[i+1] ); clusters.pop_back(); /* cerr << "After:\n"; for( i= 0 ; i< numClusters ; i++ ) { for( j= 0 ; j< clusters[i].mean.getSize() ; j++ ) cerr << ' ' << clusters[i].eigenValues[j]; cerr << "\t\t" << clusters[i].memberCount << endl; } */ }
/** fit a line to a SampleVector of data-points */ void fitToDataPoints( Line &l, const SampleVector &dataPoints ) { // The least squares line goes through the mean of all points // and the direction is the first eigenvector of the co-variance // matrix of the points //Exit if datapoints empty if( dataPoints.data.size() == 0 ) return; // First, compute arithmetic mean unsigned long dimension = l.p1.getSize(); Vector mean(dimension, 0.0); for( unsigned long i = 0; i < dataPoints.data.size(); i++ ) { //Exit if wrong size if( dataPoints.data.at(i).getSize() != l.p1.getSize() ) { cerr << "Vector dimensions must match! " << std::endl; return; } mean += dataPoints.data.at(i); } mean /= (double)dataPoints.data.size(); // Compute covariance-matrix Matrix cov( dimension, dimension, true ); Vector covEntry(dimension, 0.0); for( unsigned long i = 0 ; i< dataPoints.data.size() ; i++ ) { covEntry.copy( dataPoints.data.at(i) ); covEntry -= mean; cov.addOuterProduct( covEntry ); } // Solve Eigenvalue problem JacobiRotation solver; Vector eigValues( dimension, 0.0 ); Matrix eigVectors( dimension, dimension, true); solver.solve( cov, eigValues, eigVectors ); // Find the eigenVector with the larges eigenValue unsigned long largestEigenVector = 0; for( unsigned long i = 0; i < dimension; i++ ) { if( eigValues[i] > eigValues[largestEigenVector] ) largestEigenVector = i; } // Direction of the line is then Vector direction(dimension, 0.0); direction.assign( eigVectors.getRowVector(largestEigenVector) ); // Store in 2-point representation l.p1.assign( mean ); l.p2.assign( mean ); l.p2 += direction; return; }