예제 #1
0
/** Use modern version of the Fisher-Yates shuffle to randomly reorder the
  * given points.
  */
void Cluster_Kmeans::ShufflePoints( Iarray& PointIndices ) {
  for (unsigned int i = PointIndices.size() - 1; i != 1; i--)
  { // 0 <= j <= i
    unsigned int j = (unsigned int)(RN_.rn_gen() * (double)i);
    int temp = PointIndices[j];
    PointIndices[j] = PointIndices[i];
    PointIndices[i] = temp;
  }
  if (debug_ > 0) { 
    mprintf("DEBUG: Shuffled points:");
    for (Iarray::const_iterator it = PointIndices.begin();
                                it != PointIndices.end(); ++it)
      mprintf(" %i", *it);
    mprintf("\n");
  }
}
예제 #2
0
int SequenceAlign(CpptrajState& State, ArgList& argIn) {
  std::string blastfile = argIn.GetStringKey("blastfile");
  if (blastfile.empty()) {
    mprinterr("Error: 'blastfile' must be specified.\n");
    return 1;
  }
  ReferenceFrame qref = State.DSL()->GetReferenceFrame(argIn);
  if (qref.error() || qref.empty()) {
    mprinterr("Error: Must specify reference structure for query.\n");
    return 1;
  }
  std::string outfilename = argIn.GetStringKey("out");
  if (outfilename.empty()) {
    mprinterr("Error: Must specify output file.\n");
    return 1;
  }
  TrajectoryFile::TrajFormatType fmt = TrajectoryFile::GetFormatFromArg(argIn);
  if (fmt != TrajectoryFile::PDBFILE && fmt != TrajectoryFile::MOL2FILE)
    fmt = TrajectoryFile::PDBFILE; // Default to PDB
  int smaskoffset = argIn.getKeyInt("smaskoffset", 0) + 1;
  int qmaskoffset = argIn.getKeyInt("qmaskoffset", 0) + 1;

  // Load blast file
  mprintf("\tReading BLAST alignment from '%s'\n", blastfile.c_str());
  BufferedLine infile;
  if (infile.OpenFileRead( blastfile )) return 1;
  // Seek down to first Query line.
  const char* ptr = infile.Line();
  bool atFirstQuery = false;
  while (ptr != 0) {
    if (*ptr == 'Q') {
      if ( strncmp(ptr, "Query", 5) == 0 ) {
        atFirstQuery = true;
        break;
      }
    }
    ptr = infile.Line();
  }
  if (!atFirstQuery) {
    mprinterr("Error: 'Query' not found.\n");
    return 1;
  }

  // Read alignment. Replacing query with subject.
  typedef std::vector<char> Carray;
  typedef std::vector<int> Iarray;
  Carray Query; // Query residues
  Carray Sbjct; // Sbjct residues
  Iarray Smap;  // Smap[Sbjct index] = Query index
  while (ptr != 0) {
    const char* qline = ptr;           // query line
    const char* aline = infile.Line(); // alignment line
    const char* sline = infile.Line(); // subject line
    if (aline == 0 || sline == 0) {
      mprinterr("Error: Missing alignment line or subject line after Query:\n");
      mprinterr("Error:  %s", qline);
      return 1;
    }
    for (int idx = 12; qline[idx] != ' '; idx++) {
      if (qline[idx] == '-') {
        // Sbjct does not have corresponding res in Query
        Smap.push_back(-1);
        Sbjct.push_back( sline[idx] );
      } else if (sline[idx] == '-') {
        // Query does not have a corresponding res in Sbjct
        Query.push_back( qline[idx] );
      } else {
        // Direct Query to Sbjct map
        Smap.push_back( Query.size() );
        Sbjct.push_back( sline[idx] );
        Query.push_back( qline[idx] );
      }
    }
    // Scan to next Query 
    ptr = infile.Line();
    while (ptr != 0) {
      if (*ptr == 'Q') {
        if ( strncmp(ptr, "Query", 5) == 0 ) break;
      }
      ptr = infile.Line();
    }
  }
  // DEBUG
  std::string SmaskExp, QmaskExp;
  if (State.Debug() > 0) mprintf("  Map of Sbjct to Query:\n");
  for (int sres = 0; sres != (int)Sbjct.size(); sres++) {
    if (State.Debug() > 0)
      mprintf("%-i %3s %i", sres+smaskoffset, Residue::ConvertResName(Sbjct[sres]),
              Smap[sres]+qmaskoffset);
    const char* qres = "";
    if (Smap[sres] != -1) {
      qres = Residue::ConvertResName(Query[Smap[sres]]);
      if (SmaskExp.empty())
        SmaskExp.assign( integerToString(sres+smaskoffset) );
      else
        SmaskExp.append( "," + integerToString(sres+smaskoffset) );
      if (QmaskExp.empty())
        QmaskExp.assign( integerToString(Smap[sres]+qmaskoffset) );
      else
        QmaskExp.append( "," + integerToString(Smap[sres]+qmaskoffset) );

    }
    if (State.Debug() > 0) mprintf(" %3s\n", qres);
  }
  mprintf("Smask: %s\n", SmaskExp.c_str());
  mprintf("Qmask: %s\n", QmaskExp.c_str());
  // Check that query residues match reference.
  for (unsigned int sres = 0; sres != Sbjct.size(); sres++) {
    int qres = Smap[sres];
    if (qres != -1) {
      if (Query[qres] != qref.Parm().Res(qres).SingleCharName()) {
        mprintf("Warning: Potential residue mismatch: Query %s reference %s\n",
                Residue::ConvertResName(Query[qres]), qref.Parm().Res(qres).c_str());
      }
    }
  }
  // Build subject using coordinate from reference.
  //AtomMask sMask; // Contain atoms that should be in sTop
  Topology sTop;
  Frame sFrame;
  Iarray placeHolder; // Atom indices of placeholder residues.
  for (unsigned int sres = 0; sres != Sbjct.size(); sres++) {
    int qres = Smap[sres];
    NameType SresName( Residue::ConvertResName(Sbjct[sres]) );
    if (qres != -1) {
      Residue const& QR = qref.Parm().Res(qres);
      Residue SR(SresName, sres+1, ' ', QR.ChainID());
      if (Query[qres] == Sbjct[sres]) { // Exact match. All non-H atoms.
        for (int qat = QR.FirstAtom(); qat != QR.LastAtom(); qat++)
        {
          if (qref.Parm()[qat].Element() != Atom::HYDROGEN)
            sTop.AddTopAtom( qref.Parm()[qat], SR );
            sFrame.AddXYZ( qref.Coord().XYZ(qat) );
            //sMask.AddAtom(qat);
        }
      } else { // Partial match. Copy only backbone and CB.
        for (int qat = QR.FirstAtom(); qat != QR.LastAtom(); qat++)
        {
          if ( qref.Parm()[qat].Name().Match("N" ) ||
               qref.Parm()[qat].Name().Match("CA") ||
               qref.Parm()[qat].Name().Match("CB") ||
               qref.Parm()[qat].Name().Match("C" ) ||
               qref.Parm()[qat].Name().Match("O" ) )
          {
            sTop.AddTopAtom( qref.Parm()[qat], SR );
            sFrame.AddXYZ( qref.Coord().XYZ(qat) );
          }
        }
      }
    } else {
      // Residue in query does not exist for subject. Just put placeholder CA for now.
      Vec3 Zero(0.0);
      placeHolder.push_back( sTop.Natom() );
      sTop.AddTopAtom( Atom("CA", "C "), Residue(SresName, sres+1, ' ', ' ') );
      sFrame.AddXYZ( Zero.Dptr() );
    }
  }
  //sTop.PrintAtomInfo("*");
  mprintf("\tPlaceholder residue indices:");
  for (Iarray::const_iterator p = placeHolder.begin(); p != placeHolder.end(); ++p)
    mprintf(" %i", *p + 1);
  mprintf("\n");
  // Try to give placeholders more reasonable coordinates.
  if (!placeHolder.empty()) {
    Iarray current_indices;
    unsigned int pidx = 0;
    while (pidx < placeHolder.size()) {
      if (current_indices.empty()) {
        current_indices.push_back( placeHolder[pidx++] );
        // Search for the end of this segment
        for (; pidx != placeHolder.size(); pidx++) {
          if (placeHolder[pidx] - current_indices.back() > 1) break;
          current_indices.push_back( placeHolder[pidx] );
        }
        // DEBUG
        mprintf("\tSegment:");
        for (Iarray::const_iterator it = current_indices.begin();
                                    it != current_indices.end(); ++it)
          mprintf(" %i", *it + 1);
        // Get coordinates of residues bordering segment.
        int prev_res = sTop[current_indices.front()].ResNum() - 1;
        int next_res = sTop[current_indices.back() ].ResNum() + 1;
        mprintf(" (prev_res=%i, next_res=%i)\n", prev_res+1, next_res+1);
        Vec3 prev_crd(sFrame.XYZ(current_indices.front() - 1));
        Vec3 next_crd(sFrame.XYZ(current_indices.back()  + 1));
        prev_crd.Print("prev_crd");
        next_crd.Print("next_crd");
        Vec3 crd_step = (next_crd - prev_crd) / (double)(current_indices.size()+1);
        crd_step.Print("crd_step");
        double* xyz = sFrame.xAddress() + (current_indices.front() * 3);
        for (unsigned int i = 0; i != current_indices.size(); i++, xyz += 3) {
          prev_crd += crd_step;
          xyz[0] = prev_crd[0];
          xyz[1] = prev_crd[1];
          xyz[2] = prev_crd[2];
        }
        current_indices.clear();
      }
    }
  }
  //Topology* sTop = qref.Parm().partialModifyStateByMask( sMask );
  //if (sTop == 0) return 1;
  //Frame sFrame(qref.Coord(), sMask);
  // Write output traj
  Trajout_Single trajout;
  if (trajout.PrepareTrajWrite(outfilename, argIn, &sTop, CoordinateInfo(), 1, fmt)) return 1;
  if (trajout.WriteSingle(0, sFrame)) return 1;
  trajout.EndTraj();
  return 0;
}
예제 #3
0
/** selectedTgt and centeredREF must correspond to each other. */
double SymmetricRmsdCalc::SymmRMSD_CenteredRef(Frame const& selectedTgt, Frame const& centeredREF)
{
  // Create initial 1 to 1 atom map for all atoms; indices in 
  // SymmetricAtomIndices will correspond to positions in AMap.
  for (int atom = 0; atom < (int)AMap_.size(); atom++)
    AMap_[atom] = atom;
  tgtRemap_.SetCoordinates(selectedTgt);
  // Calculate initial best fit RMSD if necessary
  if (fit_) {
    tgtRemap_.RMSD_CenteredRef(centeredREF, rotMatrix_, tgtTrans_, useMass_);
    // Since tgtRemap is moved to origin during RMSD calc and centeredREF
    // should already be at the origin, just rotate.
    tgtRemap_.Rotate( rotMatrix_ );
  }
  // Correct RMSD for symmetry
  for (AtomIndexArray::const_iterator symmatoms = SymmetricAtomIndices_.begin();
                                      symmatoms != SymmetricAtomIndices_.end(); ++symmatoms)
  {
    // For each array of symmetric atoms, determine the lowest distance score
#   ifdef DEBUGSYMMRMSD
    mprintf("    Symmetric atoms group %u starting with atom %i\n", 
            symmatoms - SymmetricAtomIndices_.begin(), tgtMask_[symmatoms->front()] + 1);
#   endif
    cost_matrix_.Initialize( symmatoms->size() );
    for (Iarray::const_iterator ta = symmatoms->begin(); ta != symmatoms->end(); ++ta)
    {
      for (Iarray::const_iterator ra = symmatoms->begin(); ra != symmatoms->end(); ++ra)
      { 
        double dist2 = DIST2_NoImage( centeredREF.XYZ(*ra), tgtRemap_.XYZ(*ta) );
#       ifdef DEBUGSYMMRMSD
        mprintf("\t\t%i to %i: %f\n", tgtMask_[*ta] + 1, tgtMask_[*ra] + 1, dist2);
#       endif
        cost_matrix_.AddElement( dist2 );
      }
    }
    Iarray resMap = cost_matrix_.Optimize();
#   ifdef DEBUGSYMMRMSD
    mprintf("\tMapping from Hungarian Algorithm:\n");
    for (Iarray::const_iterator ha = resMap.begin(); ha != resMap.end(); ++ha)
      mprintf("\t\tMap col=%u row=%i\n", ha - resMap.begin(), *ha);
#   endif
    // Fill in overall map
    Iarray::const_iterator rmap = resMap.begin();
    for (Iarray::const_iterator atmidx = symmatoms->begin();
                                atmidx != symmatoms->end(); ++atmidx, ++rmap)
    {
      AMap_[*atmidx] = (*symmatoms)[*rmap];
#     ifdef DEBUGSYMMRMSD
      mprintf("\tAssigned atom %i to atom %i\n", tgtMask_[*atmidx] + 1,
              tgtMask_[(*symmatoms)[*rmap]] + 1);
#     endif
    }
  }
# ifdef DEBUGSYMMRMSD
  mprintf("    Final Atom Mapping:\n");
  for (unsigned int ref = 0; ref < AMap_.size(); ++ref)
    mprintf("\t%u -> %i\n", tgtMask_[ref] + 1, tgtMask_[AMap_[ref]] + 1);
  mprintf("----------------------------------------\n");
# endif
  // Remap the target frame for symmetry, then calculate new RMSD.
  // TODO: Does the topology need to be remapped as well?
  double rmsdval;
  tgtRemap_.SetCoordinatesByMap(selectedTgt, AMap_);
  if (fit_)
    rmsdval = tgtRemap_.RMSD_CenteredRef( centeredREF, rotMatrix_, tgtTrans_, useMass_ );
  else
    rmsdval = tgtRemap_.RMSD_NoFit( centeredREF, useMass_ );
  return rmsdval;
}
예제 #4
0
// Cluster_Kmeans::Cluster()
int Cluster_Kmeans::Cluster() {
  // First determine which frames are being clustered.
  Iarray const& FramesToCluster = FrameDistances().FramesToCluster();

  // Determine seeds
  FindKmeansSeeds( FramesToCluster );

  if (mode_ == RANDOM)
    RN_.rn_set( kseed_ );

  int pointCount = (int)FramesToCluster.size();

  // This array will hold the indices of the points to process each iteration.
  // If sequential this is just 0 -> pointCount. If random this will be 
  // reassigned each iteration.
  Iarray PointIndices;
  PointIndices.reserve( pointCount );
  for (int processIdx = 0; processIdx != pointCount; processIdx++)
    PointIndices.push_back( processIdx );

  // Add the seed clusters
  for (Iarray::const_iterator seedIdx = SeedIndices_.begin();
                              seedIdx != SeedIndices_.end(); ++seedIdx)
  {
    int seedFrame = FramesToCluster[ *seedIdx ];
    // A centroid is created for new clusters.
    AddCluster( ClusterDist::Cframes(1, seedFrame) );
    // NOTE: No need to calc best rep frame, only 1 frame.
    if (debug_ > 0)
      mprintf("Put frame %i in cluster %i (seed index=%i).\n", 
              seedFrame, clusters_.back().Num(), *seedIdx);
  }
  // Assign points in 3 passes. If a point looked like it belonged to cluster A
  // at first, but then we added many other points and altered our cluster 
  // shapes, its possible that we will want to reassign it to cluster B.
  for (int iteration = 0; iteration != maxIt_; iteration++)
  {
    if (mode_ == RANDOM)
      ShufflePoints( PointIndices );
    // Add each point to an existing cluster, and recompute centroid
    mprintf("\tRound %i: ", iteration);
    ProgressBar progress( PointIndices.size() );
    int Nchanged = 0;
    int prog = 0;
    for (Iarray::const_iterator pointIdx = PointIndices.begin();
                                pointIdx != PointIndices.end(); ++pointIdx, ++prog)
    {
      if (debug_ < 1) progress.Update( prog );
      int oldClusterIdx = -1;
//      if ( iteration != 0 || mode_ != SEQUENTIAL) // FIXME: Should this really happen for RANDOM
//      {
        int pointFrame = FramesToCluster[ *pointIdx ];
        if (debug_ > 0)
          mprintf("DEBUG: Processing frame %i (index %i)\n", pointFrame, *pointIdx);
        bool pointWasYanked = true;
        if (iteration > 0) {
          // Yank this point out of its cluster, recompute the centroid
          for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1)
          {
            if (C1->HasFrame( pointFrame )) 
            {
              // If this point is alone in its cluster its in the right place
              if (C1->Nframes() == 1) {
                pointWasYanked = false;
                continue; // FIXME: should this be a break?
              }
              //oldBestRep = C1->BestRepFrame(); 
              oldClusterIdx = C1->Num();
              C1->RemoveFrameUpdateCentroid( Cdist_, pointFrame ); // TEST
//              C1->RemoveFrameFromCluster( pointFrame );
              //newBestRep = C1->FindBestRepFrame();
//              C1->CalculateCentroid( Cdist_ );
              if (debug_ > 0)
                mprintf("Remove Frame %i from cluster %i\n", pointFrame, C1->Num());
              //if (clusterToClusterCentroid_) {
              //  if (oldBestRep != NewBestRep)
              //    C1->AlignToBestRep( Cdist_ ); // FIXME: Only relevant for COORDS dist?
              //  C1->CalculateCentroid( Cdist_ ); // FIXME: Seems unnessecary to align prior
              //} 
            }
          }
        } else {
          // First iteration. If this point is already in a cluster it is a seed.
          for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1)
          {
            if (C1->HasFrame( pointFrame )) {
              pointWasYanked = false;
              if (debug_ > 0)
                mprintf("Frame %i was already used to seed cluster %i\n", 
                        pointFrame, C1->Num());
              continue; // FIXME break?
            }
          }
        }
        if (pointWasYanked) {
          // Find out what cluster this point is now closest to.
          double closestDist = -1.0;
          cluster_it closestCluster = clusters_.begin();
          for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1)
          {
            double dist = Cdist_->FrameCentroidDist(pointFrame, C1->Cent());
            if (closestDist < 0.0 || dist < closestDist)
            {
              closestDist = dist;
              closestCluster = C1;
            }
          }
          //oldBestRep = closestCluster->BestRepFrame();
          closestCluster->AddFrameUpdateCentroid( Cdist_, pointFrame ); // TEST
//          closestCluster->AddFrameToCluster( pointFrame );
          //newBestRep = closestCluster->FindBestFrameFrame();
//          closestCluster->CalculateCentroid( Cdist_ );
          if (closestCluster->Num() != oldClusterIdx)
          {
            Nchanged++;
            if (debug_ > 0)
              mprintf("Remove Frame %i from cluster %i, but add to cluster %i (dist= %f).\n",
                      pointFrame, oldClusterIdx, closestCluster->Num(), closestDist);
          } else {
            if (debug_ > 0)
              mprintf("Frame %i staying in cluster %i\n", pointFrame, closestCluster->Num());
          }
          if (clusterToClusterCentroid_) {
            //if (oldBestRep != NewBestRep) {
            //    C1->AlignToBestRep( Cdist_ ); // FIXME: Only relevant for COORDS dist?
            //  C1->CalculateCentroid( Cdist_ ); // FIXME: Seems unnessecary to align prior
            //}
          }
        }
//      }
    } // END loop over points to cluster
    if (Nchanged == 0) {
      mprintf("\tK-means round %i: No change. Skipping the rest of the iterations.\n", iteration);
      break;
    } else
      mprintf("\tK-means round %i: %i points changed cluster assignment.\n", iteration, Nchanged);
  } // END k-means iterations
  // Remove any empty clusters
  // FIXME: Will there ever be empty clusters?
  RemoveEmptyClusters();
  // NOTE in PTRAJ here align all frames to best rep 
  return 0;
}