Ejemplo n.º 1
0
// Cluster_Kmeans::Cluster()
int Cluster_Kmeans::Cluster() {
  // First determine which frames are being clustered.
  Iarray const& FramesToCluster = FrameDistances().FramesToCluster();

  // Determine seeds
  FindKmeansSeeds( FramesToCluster );

  if (mode_ == RANDOM)
    RN_.rn_set( kseed_ );

  int pointCount = (int)FramesToCluster.size();

  // This array will hold the indices of the points to process each iteration.
  // If sequential this is just 0 -> pointCount. If random this will be 
  // reassigned each iteration.
  Iarray PointIndices;
  PointIndices.reserve( pointCount );
  for (int processIdx = 0; processIdx != pointCount; processIdx++)
    PointIndices.push_back( processIdx );

  // Add the seed clusters
  for (Iarray::const_iterator seedIdx = SeedIndices_.begin();
                              seedIdx != SeedIndices_.end(); ++seedIdx)
  {
    int seedFrame = FramesToCluster[ *seedIdx ];
    // A centroid is created for new clusters.
    AddCluster( ClusterDist::Cframes(1, seedFrame) );
    // NOTE: No need to calc best rep frame, only 1 frame.
    if (debug_ > 0)
      mprintf("Put frame %i in cluster %i (seed index=%i).\n", 
              seedFrame, clusters_.back().Num(), *seedIdx);
  }
  // Assign points in 3 passes. If a point looked like it belonged to cluster A
  // at first, but then we added many other points and altered our cluster 
  // shapes, its possible that we will want to reassign it to cluster B.
  for (int iteration = 0; iteration != maxIt_; iteration++)
  {
    if (mode_ == RANDOM)
      ShufflePoints( PointIndices );
    // Add each point to an existing cluster, and recompute centroid
    mprintf("\tRound %i: ", iteration);
    ProgressBar progress( PointIndices.size() );
    int Nchanged = 0;
    int prog = 0;
    for (Iarray::const_iterator pointIdx = PointIndices.begin();
                                pointIdx != PointIndices.end(); ++pointIdx, ++prog)
    {
      if (debug_ < 1) progress.Update( prog );
      int oldClusterIdx = -1;
//      if ( iteration != 0 || mode_ != SEQUENTIAL) // FIXME: Should this really happen for RANDOM
//      {
        int pointFrame = FramesToCluster[ *pointIdx ];
        if (debug_ > 0)
          mprintf("DEBUG: Processing frame %i (index %i)\n", pointFrame, *pointIdx);
        bool pointWasYanked = true;
        if (iteration > 0) {
          // Yank this point out of its cluster, recompute the centroid
          for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1)
          {
            if (C1->HasFrame( pointFrame )) 
            {
              // If this point is alone in its cluster its in the right place
              if (C1->Nframes() == 1) {
                pointWasYanked = false;
                continue; // FIXME: should this be a break?
              }
              //oldBestRep = C1->BestRepFrame(); 
              oldClusterIdx = C1->Num();
              C1->RemoveFrameUpdateCentroid( Cdist_, pointFrame ); // TEST
//              C1->RemoveFrameFromCluster( pointFrame );
              //newBestRep = C1->FindBestRepFrame();
//              C1->CalculateCentroid( Cdist_ );
              if (debug_ > 0)
                mprintf("Remove Frame %i from cluster %i\n", pointFrame, C1->Num());
              //if (clusterToClusterCentroid_) {
              //  if (oldBestRep != NewBestRep)
              //    C1->AlignToBestRep( Cdist_ ); // FIXME: Only relevant for COORDS dist?
              //  C1->CalculateCentroid( Cdist_ ); // FIXME: Seems unnessecary to align prior
              //} 
            }
          }
        } else {
          // First iteration. If this point is already in a cluster it is a seed.
          for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1)
          {
            if (C1->HasFrame( pointFrame )) {
              pointWasYanked = false;
              if (debug_ > 0)
                mprintf("Frame %i was already used to seed cluster %i\n", 
                        pointFrame, C1->Num());
              continue; // FIXME break?
            }
          }
        }
        if (pointWasYanked) {
          // Find out what cluster this point is now closest to.
          double closestDist = -1.0;
          cluster_it closestCluster = clusters_.begin();
          for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1)
          {
            double dist = Cdist_->FrameCentroidDist(pointFrame, C1->Cent());
            if (closestDist < 0.0 || dist < closestDist)
            {
              closestDist = dist;
              closestCluster = C1;
            }
          }
          //oldBestRep = closestCluster->BestRepFrame();
          closestCluster->AddFrameUpdateCentroid( Cdist_, pointFrame ); // TEST
//          closestCluster->AddFrameToCluster( pointFrame );
          //newBestRep = closestCluster->FindBestFrameFrame();
//          closestCluster->CalculateCentroid( Cdist_ );
          if (closestCluster->Num() != oldClusterIdx)
          {
            Nchanged++;
            if (debug_ > 0)
              mprintf("Remove Frame %i from cluster %i, but add to cluster %i (dist= %f).\n",
                      pointFrame, oldClusterIdx, closestCluster->Num(), closestDist);
          } else {
            if (debug_ > 0)
              mprintf("Frame %i staying in cluster %i\n", pointFrame, closestCluster->Num());
          }
          if (clusterToClusterCentroid_) {
            //if (oldBestRep != NewBestRep) {
            //    C1->AlignToBestRep( Cdist_ ); // FIXME: Only relevant for COORDS dist?
            //  C1->CalculateCentroid( Cdist_ ); // FIXME: Seems unnessecary to align prior
            //}
          }
        }
//      }
    } // END loop over points to cluster
    if (Nchanged == 0) {
      mprintf("\tK-means round %i: No change. Skipping the rest of the iterations.\n", iteration);
      break;
    } else
      mprintf("\tK-means round %i: %i points changed cluster assignment.\n", iteration, Nchanged);
  } // END k-means iterations
  // Remove any empty clusters
  // FIXME: Will there ever be empty clusters?
  RemoveEmptyClusters();
  // NOTE in PTRAJ here align all frames to best rep 
  return 0;
}
Ejemplo n.º 2
0
/** Ester, Kriegel, Sander, Xu; Proceedings of 2nd International Conference
  * on Knowledge Discovery and Data Mining (KDD-96); pp 226-231.
  */
int Cluster_DBSCAN::Cluster() {
  std::vector<int> NeighborPts;
  std::vector<int> Npts2; // Will hold neighbors of a neighbor
  std::vector<int> FramesToCluster;
  ClusterDist::Cframes cluster_frames;
  // First determine which frames are being clustered.
  // FIXME: Just use sieved array?
  for (int frame = 0; frame < (int)FrameDistances_.Nframes(); ++frame)
    if (!FrameDistances_.IgnoringRow( frame ))
      FramesToCluster.push_back( frame );
  // Calculate Kdist function
  if (!kdist_.Empty()) {
    if (kdist_.Size() == 1)
      ComputeKdist( kdist_.Front(), FramesToCluster );
    else
      ComputeKdistMap( kdist_, FramesToCluster );
    return 0;
  }
  // Set up array to keep track of points that have been visited.
  // Make it the size of FrameDistances so we can index into it. May
  // waste memory during sieving but makes code easier.
  std::vector<bool> Visited( FrameDistances_.Nframes(), false );
  // Set up array to keep track of whether points are noise or in a cluster.
  Status_.assign( FrameDistances_.Nframes(), UNASSIGNED);
  mprintf("\tStarting DBSCAN Clustering:\n");
  ProgressBar cluster_progress(FramesToCluster.size());
  int iteration = 0;
  for (std::vector<int>::iterator point = FramesToCluster.begin();
                                  point != FramesToCluster.end(); ++point)
  {
    if (!Visited[*point]) {
      // Mark this point as visited
      Visited[*point] = true;
      // Determine how many other points are near this point
      RegionQuery( NeighborPts, FramesToCluster, *point );
      if (debug_ > 0) {
        mprintf("\tPoint %i\n", *point + 1);
        mprintf("\t\t%u neighbors:", NeighborPts.size());
      }
      // If # of neighbors less than cutoff, noise; otherwise cluster
      if ((int)NeighborPts.size() < minPoints_) {
        if (debug_ > 0) mprintf(" NOISE\n");
        Status_[*point] = NOISE;
      } else {
        // Expand cluster
        cluster_frames.clear();
        cluster_frames.push_back( *point );
        // NOTE: Use index instead of iterator since NeighborPts may be
        //       modified inside this loop.
        unsigned int endidx = NeighborPts.size();
        for (unsigned int idx = 0; idx < endidx; ++idx) {
          int neighbor_pt = NeighborPts[idx];
          if (!Visited[neighbor_pt]) {
            if (debug_ > 0) mprintf(" %i", neighbor_pt + 1);
            // Mark this neighbor as visited
            Visited[neighbor_pt] = true;
            // Determine how many other points are near this neighbor
            RegionQuery( Npts2, FramesToCluster, neighbor_pt );
            if ((int)Npts2.size() >= minPoints_) {
              // Add other points to current neighbor list
              NeighborPts.insert( NeighborPts.end(), Npts2.begin(), Npts2.end() );
              endidx = NeighborPts.size();
            }
          }
          // If neighbor is not yet part of a cluster, add it to this one.
          if (Status_[neighbor_pt] != INCLUSTER) {
            cluster_frames.push_back( neighbor_pt );
            Status_[neighbor_pt] = INCLUSTER;
          }
        }
        // Remove duplicate frames
        // TODO: Take care of this in Renumber?
        std::sort(cluster_frames.begin(), cluster_frames.end());
        ClusterDist::Cframes::iterator it = std::unique(cluster_frames.begin(), 
                                                        cluster_frames.end());
        cluster_frames.resize( std::distance(cluster_frames.begin(),it) );
        // Add cluster to the list
        AddCluster( cluster_frames );
        if (debug_ > 0) {
          mprintf("\n");
          PrintClusters();
        }
      }
    }
    cluster_progress.Update(iteration++);
  } // END loop over FramesToCluster
  // Calculate the distances between each cluster based on centroids
  CalcClusterDistances();

  return 0;
}