int Cluster_ReadInfo::Cluster() { BufferedLine infile; if (infile.OpenFileRead( filename_ )) return Err(0); const char* ptr = infile.Line(); if (ptr == 0) return Err(1); ArgList infoLine( ptr, " " ); int nclusters = infoLine.getKeyInt("#Clustering:", -1); if (nclusters == -1) return Err(2); int nframes = infoLine.getKeyInt("clusters", -1); if (nframes == -1) return Err(3); if (nframes != (int)FrameDistances_.Nframes()) { mprinterr("Error: # frames in cluster info file (%i) does not match" " current # frames (%zu)\n", nframes, FrameDistances_.Nframes()); return 1; } // Scan down to clusters while (ptr[0] == '#') { ptr = infile.Line(); if (ptr == 0) return Err(1); // Save previous clustering info. Includes newline. if (ptr[1] == 'A' && ptr[2] == 'l' && ptr[3] == 'g') algorithm_.assign( ptr + 12 ); // Right past '#Algorithm: ' } // Read clusters ClusterDist::Cframes frames; for (int cnum = 0; cnum != nclusters; cnum++) { if (ptr == 0) return Err(1); frames.clear(); // TODO: Check for busted lines? for (int fidx = 0; fidx != nframes; fidx++) { if (ptr[fidx] == 'X') frames.push_back( fidx ); } AddCluster( frames ); mprintf("\tRead cluster %i, %zu frames.\n", cnum, frames.size()); ptr = infile.Line(); } infile.CloseFile(); mprintf("\tCalculating the distances between each cluster based on centroids.\n"); CalcClusterDistances(); return 0; }
int Cluster_DPeaks::Cluster() { int err = 0; // Calculate local densities if ( useGaussianKernel_ ) err = Cluster_GaussianKernel(); else err = Cluster_DiscreteDensity(); if (err != 0) return 1; // Choose points for which the min distance to point with higher density is // anomalously high. int nclusters = 0; if (choosePoints_ == PLOT_ONLY) { mprintf("Info: Cutoffs for choosing points can be determined visually from the\n" "Info: density versus min distance to cluster with next highest density file,\n" "Info: '%s'. Re-run the algorithm with appropriate distancecut and densitycut.\n"); return 0; } else if (choosePoints_ == MANUAL) nclusters = ChoosePointsManually(); else nclusters = ChoosePointsAutomatically(); mprintf("\tIdentified %i cluster centers from density vs distance peaks.\n", nclusters); // Each remaining point is assigned to the same cluster as its nearest // neighbor of higher density. Do this recursively until a cluster // center is found. int cnum = -1; for (unsigned int idx = 0; idx != Points_.size(); idx++) { if (Points_[idx].Cnum() == -1) {// Point is unassigned. AssignClusterNum(idx, cnum); //mprintf("Finished recursion for index %i\n\n", idx); } } // Sort by cluster number. NOTE: This invalidates NearestIdx std::sort( Points_.begin(), Points_.end(), Cpoint::cnum_sort() ); // Determine where each cluster starts and stops in Points array typedef std::vector<unsigned int> Parray; Parray C_start_stop; C_start_stop.reserve( nclusters * 2 ); cnum = -1; for (Carray::const_iterator point = Points_.begin(); point != Points_.end(); ++point) { if (point->Cnum() != cnum) { if (!C_start_stop.empty()) C_start_stop.push_back(point - Points_.begin()); // end of cluster C_start_stop.push_back(point - Points_.begin()); // beginning of cluster cnum = point->Cnum(); } } C_start_stop.push_back( Points_.size() ); // end of last cluster // Noise calculation. if (calc_noise_) { mprintf("\tDetermining noise frames from cluster borders.\n"); // For each cluster find a border region, defined as the set of points // assigned to that cluster which are within epsilon of any other // cluster. // NOTE: Could use a set here to prevent duplicate frames. typedef std::vector<Parray> Barray; Barray borderIndices( nclusters ); // Hold indices of border points for each cluster. for (Parray::const_iterator idx0 = C_start_stop.begin(); idx0 != C_start_stop.end(); idx0 += 2) { int c0 = Points_[*idx0].Cnum(); //mprintf("Cluster %i\n", c0); // Check each frame in this cluster. for (unsigned int i0 = *idx0; i0 != *(idx0+1); ++i0) { Cpoint const& point = Points_[i0]; // Look at each other cluster for (Parray::const_iterator idx1 = idx0 + 2; idx1 != C_start_stop.end(); idx1 += 2) { int c1 = Points_[*idx1].Cnum(); // Check each frame in other cluster for (unsigned int i1 = *idx1; i1 != *(idx1+1); i1++) { Cpoint const& other_point = Points_[i1]; if (FrameDistances_.GetFdist(point.Fnum(), other_point.Fnum()) < epsilon_) { //mprintf("\tBorder frame: %i (to cluster %i frame %i)\n", // point.Fnum() + 1, c1, other_point.Fnum() + 1); borderIndices[c0].push_back( i0 ); borderIndices[c1].push_back( i1 ); } } } } } if (debug_ > 0) mprintf("Warning: Cluster numbers here may not match final cluster numbers.\n" "\tBorder Frames:\n"); for (Parray::const_iterator idx = C_start_stop.begin(); idx != C_start_stop.end(); idx += 2) { int c0 = Points_[*idx].Cnum(); if (debug_ > 0) mprintf("\tCluster %u: %u frames: %u border frames:", c0, *(idx+1) - *idx, borderIndices[c0].size()); if (borderIndices[c0].empty()) { if (debug_ > 0) mprintf(" No border points.\n"); } else { int highestDensity = -1; // Find highest density in border region. for (Parray::const_iterator bidx = borderIndices[c0].begin(); bidx != borderIndices[c0].end(); ++bidx) { if (highestDensity == -1) highestDensity = Points_[*bidx].PointsWithinEps(); else highestDensity = std::max(highestDensity, Points_[*bidx].PointsWithinEps()); if (debug_ > 0) mprintf(" %i", Points_[*bidx].Fnum()+1); } if (debug_ > 0) mprintf(". Highest density in border= %i\n", highestDensity); // Mark any point with density <= highest border density as noise. for (unsigned int i = *idx; i != *(idx+1); i++) { Cpoint& point = Points_[i]; if (point.PointsWithinEps() <= highestDensity) { point.SetCluster( -1 ); if (debug_ > 1) mprintf("\t\tMarking frame %i as noise (density %i)\n", point.Fnum()+1, point.PointsWithinEps()); } } } } } // Add the clusters. for (Parray::const_iterator idx = C_start_stop.begin(); idx != C_start_stop.end(); idx += 2) { ClusterDist::Cframes frames; for (unsigned int i = *idx; i != *(idx+1); i++) { if (Points_[i].Cnum() != -1) frames.push_back( Points_[i].Fnum() ); } if (!frames.empty()) AddCluster( frames ); } // Calculate the distances between each cluster based on centroids CalcClusterDistances(); return 0; }