Ejemplo n.º 1
0
/** recompute cluster membership as well as eigen systems for each
    cluster */
void
KMeansClustering::recomputeClusterStats( unsigned stats )
{
  unsigned long i, j;

  // recompute the eigen systems if required
  if( clusterStatsValid< stats && stats> 0 )
  {
    // update cluster memberships
    unsigned long numPoints= data->data.size();
    for( i= 0 ; i< numClusters ; i++ )
      clusters[i].memberCount= 0;
    for( i= 0 ; i< numPoints ; i++ )
    {
      j= bestCluster( data->data[i] );
      clusterIDs[i]= j;
      clusters[j].memberCount++;
    }
    
    // compute covariance matrices
    Vector h( data->data[0].getSize() );
    for( i= 0 ; i< numClusters ; i++ )
      clusters[i].cov.zero();
    for( i= 0 ; i< numPoints ; i++ )
    {
      h.copy( data->data[i] );
      h-= clusters[clusterIDs[i]].mean;
      clusters[clusterIDs[i]].cov.addOuterProduct( h );
    }
    for( i= 0 ; i< numClusters ; i++ )
      clusters[i].cov/= (double)clusters[i].memberCount;
    
    // compute eigen vectors, eigen values
    JacobiRotation solver;
    for( i= 0 ; i< numClusters ; i++ )
      solver.solve( clusters[i].cov, clusters[i].eigenValues,
		    clusters[i].eigenVectors );
    
    clusterStatsValid= 1;
  }
  
  // if necessary, restore upper triangle of covariance matrices
  // (may have been destroyed by eigensolver)
  if( clusterStatsValid< 2 && stats>= 2 )
  {
    unsigned dim= data->data[0].getSize();
    for( unsigned k= 0 ; k< numClusters ; k++ )
    {
      Matrix &m= clusters[k].cov;
      for( i= 0 ; i< dim ; i++ )
	for( j= i+1 ; j< dim ; j++ )
	  m[j][i]= m[i][j];
    }
    
    clusterStatsValid= 2;
  }
}
Ejemplo n.º 2
0
/** split a specific cluster along its largest eigenvector */
void
KMeansClustering::splitCluster( unsigned cluster )
{
  unsigned long i;
  
  // recompute eigen systems if necessary
  if( clusterStatsValid< 1 )
    recomputeClusterStats( 1 );
  
  // split the current mean into two along the largest eigenvector
  unsigned dimension= data->data[0].getSize();
  clusters.push_back( clusters[cluster] ); // copy mean to new cluster
  ClusterStats *c1= &(clusters[cluster]);
  ClusterStats *c2= &(clusters[numClusters]);
  c1->mean.addScalarTimesVector( c1->eigenValues[0], c1->eigenVectors[0] );
  c2->mean.addScalarTimesVector( -c1->eigenValues[0], c1->eigenVectors[0] );
  
  // update the cluster membership only for members of the origonal cluster
  unsigned long numPoints= data->data.size();  
  for( i= 0 ; i< numPoints ; i++ )
    if( clusterIDs[i]== cluster )
      if( dist( data->data[i], c2->mean )< dist( data->data[i], c1->mean ) )
	clusterIDs[i]= numClusters;
  
  // update the two means to actually be the means of the respective
  // point cluster
  c1->mean.zero();
  c2->mean.zero();
  c1->memberCount= c2->memberCount= 0;
  for( i= 0 ; i< numPoints ; i++ )
  {
    if( clusterIDs[i]== cluster )
    {
      c1->mean+= data->data[i];
      c1->memberCount++;
    }
    if( clusterIDs[i]== numClusters )
    {
      c2->mean+= data->data[i];
      c2->memberCount++;
    }
  }
  c1->mean/= (double)c1->memberCount;
  c2->mean/= (double)c2->memberCount;
  
  // update the two convariance matrices;
  Vector h( data->data[0].getSize() );
  c1->cov.zero();
  c2->cov.zero();
  for( i= 0 ; i< numPoints ; i++ )
  {
    if( clusterIDs[i]== cluster )
    {
      h.copy( data->data[i] );
      h-= c1->mean;
      c1->cov.addOuterProduct( h );
    }
    if( clusterIDs[i]== numClusters )
    {
      h.copy( data->data[i] );
      h-= c2->mean;
      c2->cov.addOuterProduct( h );
    }
  }
  c1->cov/= (double)c1->memberCount;
  c2->cov/= (double)c2->memberCount;
  
  // recompute eigen values, eigen vectors
  JacobiRotation solver;
  solver.solve( c1->cov, c1->eigenValues, c1->eigenVectors );
  solver.solve( c2->cov, c2->eigenValues, c2->eigenVectors );
  
  // we have one more cluster now
  numClusters++;
}
Ejemplo n.º 3
0
/** merge two specified clusters */
void
KMeansClustering::mergeClusters( unsigned c1, unsigned c2 )
{
  unsigned long i, j;
  
  // make sure c1 is the cluster with the smaller ID
  if( c1> c2 )
  {
    i= c1; c1= c2; c2=i;
  }
  /*
  cerr << "Before:\n";
  for( i= 0 ; i< numClusters ; i++ )
  {
    for( j= 0 ; j< clusters[i].mean.getSize() ; j++ )
      cerr << ' ' << clusters[i].eigenValues[j];
    cerr << "\t\t" << clusters[i].memberCount << endl;
  }
  */
  // new mean is old means weighted by memeber counts
  double w= clusters[c1].memberCount;
  clusters[c1].memberCount+= clusters[c2].memberCount;
  w/= clusters[c1].memberCount;
  clusters[c1].mean*= w;
  clusters[c1].mean.addScalarTimesVector( 1.0-w, clusters[c2].mean );
  
  // update cluster membership, covariance matrix, eigenvalues of c1
  // (only if these quantities are valid for the other clusters)
  if( clusterStatsValid> 0 )
  {
    // covariance matrix
    unsigned long numPoints= data->data.size();
    Vector h( data->data[0].getSize() );
    clusters[c1].cov.zero();
    for( i= 0 ; i< numPoints ; i++ )
    {
      // merge merge all points from c2 into c1
      if( clusterIDs[i]== c2 )
	clusterIDs[i]= c1;
      if( clusterIDs[i]== c1 )
      {
	h.copy( data->data[i] );
	h-= clusters[c1].mean;
	clusters[c1].cov.addOuterProduct( h );
      }
      // the cluster indices > c2 are now reduced by one
      if( clusterIDs[i]> c2 )
	clusterIDs[i]--;
    }
    clusters[c1].cov/= clusters[c1].memberCount;
    
    // eigenvalues and eigenvectors
    JacobiRotation solver;
    solver.solve( clusters[c1].cov, clusters[c1].eigenValues,
		  clusters[c1].eigenVectors );
  }
  
  // consolidate array
  numClusters--;
  for( i= c2 ; i< numClusters ; i++ )
    clusters[i].copy( clusters[i+1] );
  clusters.pop_back();
  /*
  cerr << "After:\n";
  for( i= 0 ; i< numClusters ; i++ )
  {
    for( j= 0 ; j< clusters[i].mean.getSize() ; j++ )
      cerr << ' ' << clusters[i].eigenValues[j];
    cerr << "\t\t" << clusters[i].memberCount << endl;
  }
  */
}
  /** fit a line to a SampleVector of data-points */
  void fitToDataPoints( Line &l, const SampleVector &dataPoints )
  {
      // The least squares line goes through the mean of all points
      // and the direction is the first eigenvector of the co-variance 
      // matrix of the points

      //Exit if datapoints empty
      if( dataPoints.data.size() == 0 )
        return;

      // First, compute arithmetic mean
      unsigned long dimension = l.p1.getSize();
      Vector mean(dimension, 0.0);
      for( unsigned long i = 0; i < dataPoints.data.size(); i++ )
      {
          //Exit if wrong size
          if( dataPoints.data.at(i).getSize() != l.p1.getSize() )
          {
              cerr << "Vector dimensions must match! " << std::endl;
              return;
          }

          mean += dataPoints.data.at(i);
      }
      
      mean /= (double)dataPoints.data.size();

      // Compute covariance-matrix
      Matrix cov( dimension, dimension, true );
      Vector covEntry(dimension, 0.0);
      for( unsigned long i = 0 ; i< dataPoints.data.size() ; i++ )
      {
          covEntry.copy( dataPoints.data.at(i) );
          covEntry -= mean;
          cov.addOuterProduct( covEntry );
      }

      // Solve Eigenvalue problem
      JacobiRotation solver;
      Vector eigValues( dimension, 0.0 );
      Matrix eigVectors( dimension, dimension, true);
      solver.solve( cov, eigValues, eigVectors );
      
      // Find the eigenVector with the larges eigenValue
      unsigned long largestEigenVector = 0;
      for( unsigned long i = 0; i < dimension; i++ )
      {
          if( eigValues[i] > eigValues[largestEigenVector] )
              largestEigenVector = i;
      }

      // Direction of the line is then
      Vector direction(dimension, 0.0);
      direction.assign( eigVectors.getRowVector(largestEigenVector) );

      // Store in 2-point representation
      l.p1.assign( mean );
      l.p2.assign( mean );
      l.p2 += direction;
      
      return;
  }