int Cluster_ReadInfo::Cluster() {
  BufferedLine infile;
  if (infile.OpenFileRead( filename_ )) return Err(0);
  const char* ptr = infile.Line();
  if (ptr == 0) return Err(1);
  ArgList infoLine( ptr, " " );
  int nclusters = infoLine.getKeyInt("#Clustering:", -1);
  if (nclusters == -1) return Err(2);
  int nframes = infoLine.getKeyInt("clusters", -1);
  if (nframes == -1) return Err(3);
  if (nframes != (int)FrameDistances_.Nframes()) {
    mprinterr("Error: # frames in cluster info file (%i) does not match"
              " current # frames (%zu)\n", nframes, FrameDistances_.Nframes());
    return 1;
  }
  // Scan down to clusters
  while (ptr[0] == '#') {
    ptr = infile.Line();
    if (ptr == 0) return Err(1);
    // Save previous clustering info. Includes newline.
    if (ptr[1] == 'A' && ptr[2] == 'l' && ptr[3] == 'g')
      algorithm_.assign( ptr + 12 ); // Right past '#Algorithm: '
  }
  // Read clusters
  ClusterDist::Cframes frames;
  for (int cnum = 0; cnum != nclusters; cnum++) {
    if (ptr == 0) return Err(1);
    frames.clear();
    // TODO: Check for busted lines?
    for (int fidx = 0; fidx != nframes; fidx++) {
      if (ptr[fidx] == 'X')
        frames.push_back( fidx );
    }
    AddCluster( frames );
    mprintf("\tRead cluster %i, %zu frames.\n", cnum, frames.size());
    ptr = infile.Line();
  }
  infile.CloseFile();
  mprintf("\tCalculating the distances between each cluster based on centroids.\n");
  CalcClusterDistances();
  return 0;
}
Exemple #2
0
int Cluster_DPeaks::Cluster() {
  int err = 0;
  // Calculate local densities
  if ( useGaussianKernel_ )
    err = Cluster_GaussianKernel();
  else
    err = Cluster_DiscreteDensity();
  if (err != 0) return 1;
  // Choose points for which the min distance to point with higher density is
  // anomalously high.
  int nclusters = 0;
  if (choosePoints_ == PLOT_ONLY) {
    mprintf("Info: Cutoffs for choosing points can be determined visually from the\n"
            "Info:   density versus min distance to cluster with next highest density file,\n"
            "Info:   '%s'. Re-run the algorithm with appropriate distancecut and densitycut.\n");
    return 0;
  } else if (choosePoints_ == MANUAL)
    nclusters = ChoosePointsManually();
  else
    nclusters = ChoosePointsAutomatically(); 
      
  mprintf("\tIdentified %i cluster centers from density vs distance peaks.\n", nclusters);
  // Each remaining point is assigned to the same cluster as its nearest
  // neighbor of higher density. Do this recursively until a cluster
  // center is found.
  int cnum = -1;
  for (unsigned int idx = 0; idx != Points_.size(); idx++) {
    if (Points_[idx].Cnum() == -1) {// Point is unassigned.
      AssignClusterNum(idx, cnum);
      //mprintf("Finished recursion for index %i\n\n", idx);
    }
  }
  // Sort by cluster number. NOTE: This invalidates NearestIdx
  std::sort( Points_.begin(), Points_.end(), Cpoint::cnum_sort() );
  // Determine where each cluster starts and stops in Points array
  typedef std::vector<unsigned int> Parray;
  Parray C_start_stop;
  C_start_stop.reserve( nclusters * 2 );
  cnum = -1;
  for (Carray::const_iterator point = Points_.begin(); point != Points_.end(); ++point)
  {
    if (point->Cnum() != cnum) {
      if (!C_start_stop.empty()) C_start_stop.push_back(point - Points_.begin()); // end of cluster
      C_start_stop.push_back(point - Points_.begin()); // beginning of cluster
      cnum = point->Cnum();
    }
  }
  C_start_stop.push_back( Points_.size() ); // end of last cluster
  // Noise calculation.
  if (calc_noise_) {
    mprintf("\tDetermining noise frames from cluster borders.\n");
    // For each cluster find a border region, defined as the set of points
    // assigned to that cluster which are within epsilon of any other
    // cluster.
    // NOTE: Could use a set here to prevent duplicate frames.
    typedef std::vector<Parray> Barray;
    Barray borderIndices( nclusters ); // Hold indices of border points for each cluster.
    for (Parray::const_iterator idx0 = C_start_stop.begin();
                                idx0 != C_start_stop.end(); idx0 += 2)
    {
      int c0 = Points_[*idx0].Cnum();
      //mprintf("Cluster %i\n", c0);
      // Check each frame in this cluster.
      for (unsigned int i0 = *idx0; i0 != *(idx0+1); ++i0)
      {
        Cpoint const& point = Points_[i0];
        // Look at each other cluster
        for (Parray::const_iterator idx1 = idx0 + 2;
                                    idx1 != C_start_stop.end(); idx1 += 2)
        {
          int c1 = Points_[*idx1].Cnum();
          // Check each frame in other cluster
          for (unsigned int i1 = *idx1; i1 != *(idx1+1); i1++)
          {
            Cpoint const& other_point = Points_[i1];
            if (FrameDistances_.GetFdist(point.Fnum(), other_point.Fnum()) < epsilon_) {
              //mprintf("\tBorder frame: %i (to cluster %i frame %i)\n",
              //        point.Fnum() + 1, c1, other_point.Fnum() + 1);
              borderIndices[c0].push_back( i0 );
              borderIndices[c1].push_back( i1 );
            }
          }
        }
      }
    }
    if (debug_ > 0)
      mprintf("Warning: Cluster numbers here may not match final cluster numbers.\n"
              "\tBorder Frames:\n");
    for (Parray::const_iterator idx = C_start_stop.begin();
                                idx != C_start_stop.end(); idx += 2)
    {
      int c0 = Points_[*idx].Cnum();
      if (debug_ > 0)
        mprintf("\tCluster %u: %u frames: %u border frames:", c0, *(idx+1) - *idx,
                borderIndices[c0].size());
      if (borderIndices[c0].empty()) {
        if (debug_ > 0) mprintf(" No border points.\n");
      } else {
        int highestDensity = -1;
        // Find highest density in border region.
        for (Parray::const_iterator bidx = borderIndices[c0].begin();
                                    bidx != borderIndices[c0].end(); ++bidx)
        {
          if (highestDensity == -1)
            highestDensity = Points_[*bidx].PointsWithinEps();
          else
            highestDensity = std::max(highestDensity, Points_[*bidx].PointsWithinEps());
          if (debug_ > 0) mprintf(" %i", Points_[*bidx].Fnum()+1);
        }
        if (debug_ > 0) mprintf(". Highest density in border= %i\n", highestDensity);
        // Mark any point with density <= highest border density as noise.
        for (unsigned int i = *idx; i != *(idx+1); i++)
        {
          Cpoint& point = Points_[i];
          if (point.PointsWithinEps() <= highestDensity) {
            point.SetCluster( -1 );
            if (debug_ > 1)
              mprintf("\t\tMarking frame %i as noise (density %i)\n",
                       point.Fnum()+1, point.PointsWithinEps());
          }
        }
      }
    }
  }
  // Add the clusters.
  for (Parray::const_iterator idx = C_start_stop.begin();
                              idx != C_start_stop.end(); idx += 2)
  {
    ClusterDist::Cframes frames;
    for (unsigned int i = *idx; i != *(idx+1); i++) {
      if (Points_[i].Cnum() != -1)
        frames.push_back( Points_[i].Fnum() );
    }
    if (!frames.empty())
      AddCluster( frames );
  }
  // Calculate the distances between each cluster based on centroids
  CalcClusterDistances();
  return 0;
}