// Cluster_Kmeans::Cluster() int Cluster_Kmeans::Cluster() { // First determine which frames are being clustered. Iarray const& FramesToCluster = FrameDistances().FramesToCluster(); // Determine seeds FindKmeansSeeds( FramesToCluster ); if (mode_ == RANDOM) RN_.rn_set( kseed_ ); int pointCount = (int)FramesToCluster.size(); // This array will hold the indices of the points to process each iteration. // If sequential this is just 0 -> pointCount. If random this will be // reassigned each iteration. Iarray PointIndices; PointIndices.reserve( pointCount ); for (int processIdx = 0; processIdx != pointCount; processIdx++) PointIndices.push_back( processIdx ); // Add the seed clusters for (Iarray::const_iterator seedIdx = SeedIndices_.begin(); seedIdx != SeedIndices_.end(); ++seedIdx) { int seedFrame = FramesToCluster[ *seedIdx ]; // A centroid is created for new clusters. AddCluster( ClusterDist::Cframes(1, seedFrame) ); // NOTE: No need to calc best rep frame, only 1 frame. if (debug_ > 0) mprintf("Put frame %i in cluster %i (seed index=%i).\n", seedFrame, clusters_.back().Num(), *seedIdx); } // Assign points in 3 passes. If a point looked like it belonged to cluster A // at first, but then we added many other points and altered our cluster // shapes, its possible that we will want to reassign it to cluster B. for (int iteration = 0; iteration != maxIt_; iteration++) { if (mode_ == RANDOM) ShufflePoints( PointIndices ); // Add each point to an existing cluster, and recompute centroid mprintf("\tRound %i: ", iteration); ProgressBar progress( PointIndices.size() ); int Nchanged = 0; int prog = 0; for (Iarray::const_iterator pointIdx = PointIndices.begin(); pointIdx != PointIndices.end(); ++pointIdx, ++prog) { if (debug_ < 1) progress.Update( prog ); int oldClusterIdx = -1; // if ( iteration != 0 || mode_ != SEQUENTIAL) // FIXME: Should this really happen for RANDOM // { int pointFrame = FramesToCluster[ *pointIdx ]; if (debug_ > 0) mprintf("DEBUG: Processing frame %i (index %i)\n", pointFrame, *pointIdx); bool pointWasYanked = true; if (iteration > 0) { // Yank this point out of its cluster, recompute the centroid for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1) { if (C1->HasFrame( pointFrame )) { // If this point is alone in its cluster its in the right place if (C1->Nframes() == 1) { pointWasYanked = false; continue; // FIXME: should this be a break? } //oldBestRep = C1->BestRepFrame(); oldClusterIdx = C1->Num(); C1->RemoveFrameUpdateCentroid( Cdist_, pointFrame ); // TEST // C1->RemoveFrameFromCluster( pointFrame ); //newBestRep = C1->FindBestRepFrame(); // C1->CalculateCentroid( Cdist_ ); if (debug_ > 0) mprintf("Remove Frame %i from cluster %i\n", pointFrame, C1->Num()); //if (clusterToClusterCentroid_) { // if (oldBestRep != NewBestRep) // C1->AlignToBestRep( Cdist_ ); // FIXME: Only relevant for COORDS dist? // C1->CalculateCentroid( Cdist_ ); // FIXME: Seems unnessecary to align prior //} } } } else { // First iteration. If this point is already in a cluster it is a seed. for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1) { if (C1->HasFrame( pointFrame )) { pointWasYanked = false; if (debug_ > 0) mprintf("Frame %i was already used to seed cluster %i\n", pointFrame, C1->Num()); continue; // FIXME break? } } } if (pointWasYanked) { // Find out what cluster this point is now closest to. double closestDist = -1.0; cluster_it closestCluster = clusters_.begin(); for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1) { double dist = Cdist_->FrameCentroidDist(pointFrame, C1->Cent()); if (closestDist < 0.0 || dist < closestDist) { closestDist = dist; closestCluster = C1; } } //oldBestRep = closestCluster->BestRepFrame(); closestCluster->AddFrameUpdateCentroid( Cdist_, pointFrame ); // TEST // closestCluster->AddFrameToCluster( pointFrame ); //newBestRep = closestCluster->FindBestFrameFrame(); // closestCluster->CalculateCentroid( Cdist_ ); if (closestCluster->Num() != oldClusterIdx) { Nchanged++; if (debug_ > 0) mprintf("Remove Frame %i from cluster %i, but add to cluster %i (dist= %f).\n", pointFrame, oldClusterIdx, closestCluster->Num(), closestDist); } else { if (debug_ > 0) mprintf("Frame %i staying in cluster %i\n", pointFrame, closestCluster->Num()); } if (clusterToClusterCentroid_) { //if (oldBestRep != NewBestRep) { // C1->AlignToBestRep( Cdist_ ); // FIXME: Only relevant for COORDS dist? // C1->CalculateCentroid( Cdist_ ); // FIXME: Seems unnessecary to align prior //} } } // } } // END loop over points to cluster if (Nchanged == 0) { mprintf("\tK-means round %i: No change. Skipping the rest of the iterations.\n", iteration); break; } else mprintf("\tK-means round %i: %i points changed cluster assignment.\n", iteration, Nchanged); } // END k-means iterations // Remove any empty clusters // FIXME: Will there ever be empty clusters? RemoveEmptyClusters(); // NOTE in PTRAJ here align all frames to best rep return 0; }
/** Ester, Kriegel, Sander, Xu; Proceedings of 2nd International Conference * on Knowledge Discovery and Data Mining (KDD-96); pp 226-231. */ int Cluster_DBSCAN::Cluster() { std::vector<int> NeighborPts; std::vector<int> Npts2; // Will hold neighbors of a neighbor std::vector<int> FramesToCluster; ClusterDist::Cframes cluster_frames; // First determine which frames are being clustered. // FIXME: Just use sieved array? for (int frame = 0; frame < (int)FrameDistances_.Nframes(); ++frame) if (!FrameDistances_.IgnoringRow( frame )) FramesToCluster.push_back( frame ); // Calculate Kdist function if (!kdist_.Empty()) { if (kdist_.Size() == 1) ComputeKdist( kdist_.Front(), FramesToCluster ); else ComputeKdistMap( kdist_, FramesToCluster ); return 0; } // Set up array to keep track of points that have been visited. // Make it the size of FrameDistances so we can index into it. May // waste memory during sieving but makes code easier. std::vector<bool> Visited( FrameDistances_.Nframes(), false ); // Set up array to keep track of whether points are noise or in a cluster. Status_.assign( FrameDistances_.Nframes(), UNASSIGNED); mprintf("\tStarting DBSCAN Clustering:\n"); ProgressBar cluster_progress(FramesToCluster.size()); int iteration = 0; for (std::vector<int>::iterator point = FramesToCluster.begin(); point != FramesToCluster.end(); ++point) { if (!Visited[*point]) { // Mark this point as visited Visited[*point] = true; // Determine how many other points are near this point RegionQuery( NeighborPts, FramesToCluster, *point ); if (debug_ > 0) { mprintf("\tPoint %i\n", *point + 1); mprintf("\t\t%u neighbors:", NeighborPts.size()); } // If # of neighbors less than cutoff, noise; otherwise cluster if ((int)NeighborPts.size() < minPoints_) { if (debug_ > 0) mprintf(" NOISE\n"); Status_[*point] = NOISE; } else { // Expand cluster cluster_frames.clear(); cluster_frames.push_back( *point ); // NOTE: Use index instead of iterator since NeighborPts may be // modified inside this loop. unsigned int endidx = NeighborPts.size(); for (unsigned int idx = 0; idx < endidx; ++idx) { int neighbor_pt = NeighborPts[idx]; if (!Visited[neighbor_pt]) { if (debug_ > 0) mprintf(" %i", neighbor_pt + 1); // Mark this neighbor as visited Visited[neighbor_pt] = true; // Determine how many other points are near this neighbor RegionQuery( Npts2, FramesToCluster, neighbor_pt ); if ((int)Npts2.size() >= minPoints_) { // Add other points to current neighbor list NeighborPts.insert( NeighborPts.end(), Npts2.begin(), Npts2.end() ); endidx = NeighborPts.size(); } } // If neighbor is not yet part of a cluster, add it to this one. if (Status_[neighbor_pt] != INCLUSTER) { cluster_frames.push_back( neighbor_pt ); Status_[neighbor_pt] = INCLUSTER; } } // Remove duplicate frames // TODO: Take care of this in Renumber? std::sort(cluster_frames.begin(), cluster_frames.end()); ClusterDist::Cframes::iterator it = std::unique(cluster_frames.begin(), cluster_frames.end()); cluster_frames.resize( std::distance(cluster_frames.begin(),it) ); // Add cluster to the list AddCluster( cluster_frames ); if (debug_ > 0) { mprintf("\n"); PrintClusters(); } } } cluster_progress.Update(iteration++); } // END loop over FramesToCluster // Calculate the distances between each cluster based on centroids CalcClusterDistances(); return 0; }