/** Set up atom/residue indices corresponding to atoms selected in mask. * This is done to make creating an atom/residue contact map easier. */ Action_NativeContacts::Iarray Action_NativeContacts::SetupContactIndices( AtomMask const& mask, Topology const& parmIn) { Iarray contactIdx; for (AtomMask::const_iterator atom = mask.begin(); atom != mask.end(); ++atom) if (byResidue_) contactIdx.push_back( parmIn[*atom].ResNum() ); else contactIdx.push_back( *atom ); return contactIdx; }
// Analysis_TI::Calc_Nskip() int Analysis_TI::Calc_Nskip() { // sum: Hold the results of integration for each curve (skip value) Darray sum(nskip_.size(), 0.0); // lastSkipPoint: Points after which averages can be recorded Iarray lastSkipPoint; for (Iarray::const_iterator it = nskip_.begin(); it != nskip_.end(); ++it) lastSkipPoint.push_back( *it - 1 ); // Loop over input data sets. for (unsigned int idx = 0; idx != input_dsets_.size(); idx++) { DataSet_1D const& ds = static_cast<DataSet_1D const&>( *(input_dsets_[idx]) ); if (CheckSet(ds)) return 1; // Determine if skip values are valid for this set. Darray Npoints; // Number of points after skipping for (Iarray::const_iterator it = nskip_.begin(); it != nskip_.end(); ++it) { int np = (int)ds.Size() - *it; if (np < 1) { mprinterr("Error: Skipped too many points (set '%s' size is %zu)\n",ds.legend(),ds.Size()); return 1; } Npoints.push_back((double)np); } // Calculate averages for each value of skip Darray avg(nskip_.size(), 0.0); for (int i = 0; i != (int)ds.Size(); i++) { for (unsigned int j = 0; j != nskip_.size(); j++) if (i > lastSkipPoint[j]) avg[j] += ds.Dval( i ); } // Store average DV/DL for each value of skip for (unsigned int j = 0; j != nskip_.size(); j++) { avg[j] /= Npoints[j]; if (debug_ > 0) mprintf("\t%s Skip= %i <DV/DL>= %g\n", ds.legend(), nskip_[j], avg[j]); DataSet_Mesh& CR = static_cast<DataSet_Mesh&>( *(curve_[j]) ); CR.AddXY(xval_[idx], avg[j]); if (mode_ == GAUSSIAN_QUAD) sum[j] += (wgt_[idx] * avg[j]); } } // END loop over input data sets if (mode_ == TRAPEZOID) Integrate_Trapezoid(sum); // Store final TI integration values. DataSet_Mesh& DA = static_cast<DataSet_Mesh&>( *dAout_ ); DA.ModifyDim(Dimension::X).SetLabel("PtsSkipped"); for (unsigned int j = 0; j != nskip_.size(); j++) DA.AddXY(nskip_[j], sum[j]); return 0; }
/** Use modern version of the Fisher-Yates shuffle to randomly reorder the * given points. */ void Cluster_Kmeans::ShufflePoints( Iarray& PointIndices ) { for (unsigned int i = PointIndices.size() - 1; i != 1; i--) { // 0 <= j <= i unsigned int j = (unsigned int)(RN_.rn_gen() * (double)i); int temp = PointIndices[j]; PointIndices[j] = PointIndices[i]; PointIndices[i] = temp; } if (debug_ > 0) { mprintf("DEBUG: Shuffled points:"); for (Iarray::const_iterator it = PointIndices.begin(); it != PointIndices.end(); ++it) mprintf(" %i", *it); mprintf("\n"); } }
Analysis::RetType Analysis_TI::Analyze() { Darray sum(nskip_.size(), 0.0); DataSet_Mesh& DA = static_cast<DataSet_Mesh&>( *dAout_ ); Iarray lastSkipPoint; // Points after which averages can be recorded for (Iarray::const_iterator it = nskip_.begin(); it != nskip_.end(); ++it) lastSkipPoint.push_back( *it - 1 ); // Run for multiple skip values, helps test convergences. for (unsigned int idx = 0; idx != input_dsets_.size(); idx++) { DataSet_1D const& ds = static_cast<DataSet_1D const&>( *(input_dsets_[idx]) ); if (ds.Size() < 1) { mprinterr("Error: Set '%s' is empty.\n", ds.legend()); return Analysis::ERR; } // Determine if skip values are valid for this set. Darray Npoints; // Number of points after skipping for (Iarray::const_iterator it = nskip_.begin(); it != nskip_.end(); ++it) { int np = (int)ds.Size() - *it; if (np < 1) { mprinterr("Error: Skipped too many points (set '%s' size is %zu)\n",ds.legend(),ds.Size()); return Analysis::ERR; } Npoints.push_back((double)np); } // Calculate averages for each value of skip Darray avg(nskip_.size(), 0.0); for (int i = 0; i != (int)ds.Size(); i++) { for (unsigned int j = 0; j != nskip_.size(); j++) if (i > lastSkipPoint[j]) avg[j] += ds.Dval( i ); } // Store average DV/DL for each value of skip for (unsigned int j = 0; j != nskip_.size(); j++) { avg[j] /= Npoints[j]; //mprintf("\t<DV/DL>=%g\n", avg); DataSet_Mesh& CR = static_cast<DataSet_Mesh&>( *(curve_[j]) ); CR.AddXY(quad_[idx], avg[j]); sum[j] += (wgt_[idx] * avg[j]); } } for (unsigned int j = 0; j != nskip_.size(); j++) DA.AddXY(nskip_[j], sum[j]); return Analysis::OK; }
/** Determine what atoms each mask pertains to for the current parm file. */ Action::RetType Action_NMRrst::Setup(ActionSetup& setup) { if (!viewrst_.empty() && rsttop_ == 0) rsttop_ = setup.TopAddress(); // --------------------------------------------- // Set up NOEs from file. for (noeDataArray::iterator noe = NOEs_.begin(); noe != NOEs_.end(); ++noe) { if (setup.Top().SetupIntegerMask( noe->dMask1_ )) return Action::ERR; if (setup.Top().SetupIntegerMask( noe->dMask2_ )) return Action::ERR; if (noe->dMask1_.None() || noe->dMask2_.None()) { mprintf("Warning: One or both masks for NOE '%s' have no atoms (%i and %i).\n", noe->dist_->legend(), noe->dMask1_.Nselected(), noe->dMask2_.Nselected()); noe->active_ = false; } else noe->active_ = true; } // --------------------------------------------- // Set up potential NOE sites. if (findNOEs_) { if (setup.Top().SetupCharMask( Mask_ )) return Action::ERR; Mask_.MaskInfo(); if (Mask_.None()) return Action::SKIP; SiteArray potentialSites; // .clear(); AtomMap resMap; resMap.SetDebug( debug_ ); std::vector<bool> selected; Range soluteRes = setup.Top().SoluteResidues(); for (Range::const_iterator res = soluteRes.begin(); res != soluteRes.end(); ++res) { int res_first_atom = setup.Top().Res(*res).FirstAtom(); selected.assign( setup.Top().Res(*res).NumAtoms(), false ); // Find symmetric atom groups. AtomMap::AtomIndexArray symmGroups; if (resMap.SymmetricAtoms(setup.Top(), symmGroups, *res)) return Action::ERR; // DEBUG if (debug_ > 0) { mprintf("DEBUG: Residue %i: symmetric atom groups:\n", *res + 1); for (AtomMap::AtomIndexArray::const_iterator grp = symmGroups.begin(); grp != symmGroups.end(); ++grp) { mprintf("\t\t"); for (AtomMap::Iarray::const_iterator at = grp->begin(); at != grp->end(); ++at) mprintf(" %s", setup.Top().TruncAtomNameNum( *at ).c_str()); mprintf("\n"); } } // Each symmetric hydrogen atom group is a site. for (AtomMap::AtomIndexArray::const_iterator grp = symmGroups.begin(); grp != symmGroups.end(); ++grp) { // NOTE: If first atom is H all should be H. if ( setup.Top()[ grp->front() ].Element() == Atom::HYDROGEN ) { Iarray symmAtomGroup; for (Iarray::const_iterator at = grp->begin(); at != grp->end(); ++at) if (Mask_.AtomInCharMask( *at )) symmAtomGroup.push_back( *at ); if (!symmAtomGroup.empty()) { potentialSites.push_back( Site(*res, symmAtomGroup) ); // Mark symmetric atoms as selected. for (AtomMap::Iarray::const_iterator at = grp->begin(); at != grp->end(); ++at) selected[ *at - res_first_atom ] = true; } } } // All other non-selected hydrogens bonded to same heavy atom are sites. for (int ratom = res_first_atom; ratom != setup.Top().Res(*res).LastAtom(); ++ratom) { if ( setup.Top()[ratom].Element() != Atom::HYDROGEN ) { Iarray heavyAtomGroup; for (Atom::bond_iterator ba = setup.Top()[ratom].bondbegin(); ba != setup.Top()[ratom].bondend(); ++ba) if ( Mask_.AtomInCharMask(*ba) && *ba >= res_first_atom && *ba < setup.Top().Res(*res).LastAtom() ) { if ( !selected[ *ba - res_first_atom ] && setup.Top()[ *ba ].Element() == Atom::HYDROGEN ) heavyAtomGroup.push_back( *ba ); } if (!heavyAtomGroup.empty()) potentialSites.push_back( Site(*res, heavyAtomGroup) ); } } } mprintf("\t%zu potential NOE sites:\n", potentialSites.size()); for (SiteArray::const_iterator site = potentialSites.begin(); site != potentialSites.end(); ++site) { mprintf(" %u\tRes %i:", site - potentialSites.begin(), site->ResNum()+1); for (unsigned int idx = 0; idx != site->Nindices(); ++idx) mprintf(" %s", setup.Top().TruncAtomNameNum( site->Idx(idx) ).c_str()); mprintf("\n"); } if (noeArray_.empty()) { size_t siteArraySize = 0; // Set up all potential NOE pairs. Keep track of size. for (SiteArray::const_iterator site1 = potentialSites.begin(); site1 != potentialSites.end(); ++site1) { for (SiteArray::const_iterator site2 = site1 + 1; site2 != potentialSites.end(); ++site2) { if (site1->ResNum() != site2->ResNum()) { std::string legend = site1->SiteLegend(setup.Top()) + "--" + site2->SiteLegend(setup.Top()); DataSet* ds = 0; if (series_) { ds = masterDSL_->AddSet(DataSet::FLOAT, MetaData(setname_, "foundNOE", noeArray_.size())); if (ds == 0) return Action::ERR; // Construct a data set name. ds->SetLegend(legend); } noeArray_.push_back( NOEtype(*site1, *site2, ds, legend) ); siteArraySize += (2 * sizeof(int) * site1->Nindices()) + (2 * sizeof(int) * site2->Nindices()); } } } numNoePairs_ = noeArray_.size(); size_t siteSize = sizeof(int) + (2 * sizeof(Iarray)) + sizeof(Site); size_t noeSize = (2 * siteSize) + sizeof(DataSet*) + sizeof(double) + sizeof(NOEtype); if (series_) noeSize += sizeof(std::vector<float>); size_t noeArraySize = (noeSize * numNoePairs_) + siteArraySize; if (series_) noeArraySize += (setup.Nframes() * numNoePairs_ * sizeof(float)); mprintf("\t%zu potential NOE pairs. Estimated memory usage is %s\n", numNoePairs_, ByteString(noeArraySize, BYTE_DECIMAL).c_str()); } else if (numNoePairs_ != potentialSites.size()) { mprinterr("Warning: Found NOE matrix has already been set up for %zu potential\n" "Warning: NOEs, but %zu NOEs currently found.\n", numNoePairs_, potentialSites.size()); return Action::SKIP; } } // --------------------------------------------- // Set up NOEs specified on the command line if (!Pairs_.empty()) { if (!specifiedNOEs_.empty()) { mprintf("Warning: Specifying NOEs currently only works with first topology used.\n"); return Action::SKIP; } for (MaskPairArray::iterator mp = Pairs_.begin(); mp != Pairs_.end(); mp++) { if (setup.Top().SetupIntegerMask( mp->first )) return Action::ERR; int res1 = CheckSameResidue(setup.Top(), mp->first); if (res1 < 0) continue; if (setup.Top().SetupIntegerMask( mp->second )) return Action::ERR; int res2 = CheckSameResidue(setup.Top(), mp->second); if (res2 < 0) continue; Site site1( res1, mp->first.Selected() ); Site site2( res2, mp->second.Selected() ); std::string legend = site1.SiteLegend(setup.Top()) + "--" + site2.SiteLegend(setup.Top()); DataSet* ds = 0; if (series_) { ds = masterDSL_->AddSet(DataSet::FLOAT, MetaData(setname_, "specNOE", specifiedNOEs_.size())); if (ds == 0) return Action::ERR; ds->SetLegend(legend); } specifiedNOEs_.push_back( NOEtype(site1, site2, ds, legend) ); } } // Set up imaging info for this parm Image_.SetupImaging( setup.CoordInfo().TrajBox().Type() ); if (Image_.ImagingEnabled()) mprintf("\tImaged.\n"); else mprintf("\tImaging off.\n"); return Action::OK; }
int SequenceAlign(CpptrajState& State, ArgList& argIn) { std::string blastfile = argIn.GetStringKey("blastfile"); if (blastfile.empty()) { mprinterr("Error: 'blastfile' must be specified.\n"); return 1; } ReferenceFrame qref = State.DSL()->GetReferenceFrame(argIn); if (qref.error() || qref.empty()) { mprinterr("Error: Must specify reference structure for query.\n"); return 1; } std::string outfilename = argIn.GetStringKey("out"); if (outfilename.empty()) { mprinterr("Error: Must specify output file.\n"); return 1; } TrajectoryFile::TrajFormatType fmt = TrajectoryFile::GetFormatFromArg(argIn); if (fmt != TrajectoryFile::PDBFILE && fmt != TrajectoryFile::MOL2FILE) fmt = TrajectoryFile::PDBFILE; // Default to PDB int smaskoffset = argIn.getKeyInt("smaskoffset", 0) + 1; int qmaskoffset = argIn.getKeyInt("qmaskoffset", 0) + 1; // Load blast file mprintf("\tReading BLAST alignment from '%s'\n", blastfile.c_str()); BufferedLine infile; if (infile.OpenFileRead( blastfile )) return 1; // Seek down to first Query line. const char* ptr = infile.Line(); bool atFirstQuery = false; while (ptr != 0) { if (*ptr == 'Q') { if ( strncmp(ptr, "Query", 5) == 0 ) { atFirstQuery = true; break; } } ptr = infile.Line(); } if (!atFirstQuery) { mprinterr("Error: 'Query' not found.\n"); return 1; } // Read alignment. Replacing query with subject. typedef std::vector<char> Carray; typedef std::vector<int> Iarray; Carray Query; // Query residues Carray Sbjct; // Sbjct residues Iarray Smap; // Smap[Sbjct index] = Query index while (ptr != 0) { const char* qline = ptr; // query line const char* aline = infile.Line(); // alignment line const char* sline = infile.Line(); // subject line if (aline == 0 || sline == 0) { mprinterr("Error: Missing alignment line or subject line after Query:\n"); mprinterr("Error: %s", qline); return 1; } for (int idx = 12; qline[idx] != ' '; idx++) { if (qline[idx] == '-') { // Sbjct does not have corresponding res in Query Smap.push_back(-1); Sbjct.push_back( sline[idx] ); } else if (sline[idx] == '-') { // Query does not have a corresponding res in Sbjct Query.push_back( qline[idx] ); } else { // Direct Query to Sbjct map Smap.push_back( Query.size() ); Sbjct.push_back( sline[idx] ); Query.push_back( qline[idx] ); } } // Scan to next Query ptr = infile.Line(); while (ptr != 0) { if (*ptr == 'Q') { if ( strncmp(ptr, "Query", 5) == 0 ) break; } ptr = infile.Line(); } } // DEBUG std::string SmaskExp, QmaskExp; if (State.Debug() > 0) mprintf(" Map of Sbjct to Query:\n"); for (int sres = 0; sres != (int)Sbjct.size(); sres++) { if (State.Debug() > 0) mprintf("%-i %3s %i", sres+smaskoffset, Residue::ConvertResName(Sbjct[sres]), Smap[sres]+qmaskoffset); const char* qres = ""; if (Smap[sres] != -1) { qres = Residue::ConvertResName(Query[Smap[sres]]); if (SmaskExp.empty()) SmaskExp.assign( integerToString(sres+smaskoffset) ); else SmaskExp.append( "," + integerToString(sres+smaskoffset) ); if (QmaskExp.empty()) QmaskExp.assign( integerToString(Smap[sres]+qmaskoffset) ); else QmaskExp.append( "," + integerToString(Smap[sres]+qmaskoffset) ); } if (State.Debug() > 0) mprintf(" %3s\n", qres); } mprintf("Smask: %s\n", SmaskExp.c_str()); mprintf("Qmask: %s\n", QmaskExp.c_str()); // Check that query residues match reference. for (unsigned int sres = 0; sres != Sbjct.size(); sres++) { int qres = Smap[sres]; if (qres != -1) { if (Query[qres] != qref.Parm().Res(qres).SingleCharName()) { mprintf("Warning: Potential residue mismatch: Query %s reference %s\n", Residue::ConvertResName(Query[qres]), qref.Parm().Res(qres).c_str()); } } } // Build subject using coordinate from reference. //AtomMask sMask; // Contain atoms that should be in sTop Topology sTop; Frame sFrame; Iarray placeHolder; // Atom indices of placeholder residues. for (unsigned int sres = 0; sres != Sbjct.size(); sres++) { int qres = Smap[sres]; NameType SresName( Residue::ConvertResName(Sbjct[sres]) ); if (qres != -1) { Residue const& QR = qref.Parm().Res(qres); Residue SR(SresName, sres+1, ' ', QR.ChainID()); if (Query[qres] == Sbjct[sres]) { // Exact match. All non-H atoms. for (int qat = QR.FirstAtom(); qat != QR.LastAtom(); qat++) { if (qref.Parm()[qat].Element() != Atom::HYDROGEN) sTop.AddTopAtom( qref.Parm()[qat], SR ); sFrame.AddXYZ( qref.Coord().XYZ(qat) ); //sMask.AddAtom(qat); } } else { // Partial match. Copy only backbone and CB. for (int qat = QR.FirstAtom(); qat != QR.LastAtom(); qat++) { if ( qref.Parm()[qat].Name().Match("N" ) || qref.Parm()[qat].Name().Match("CA") || qref.Parm()[qat].Name().Match("CB") || qref.Parm()[qat].Name().Match("C" ) || qref.Parm()[qat].Name().Match("O" ) ) { sTop.AddTopAtom( qref.Parm()[qat], SR ); sFrame.AddXYZ( qref.Coord().XYZ(qat) ); } } } } else { // Residue in query does not exist for subject. Just put placeholder CA for now. Vec3 Zero(0.0); placeHolder.push_back( sTop.Natom() ); sTop.AddTopAtom( Atom("CA", "C "), Residue(SresName, sres+1, ' ', ' ') ); sFrame.AddXYZ( Zero.Dptr() ); } } //sTop.PrintAtomInfo("*"); mprintf("\tPlaceholder residue indices:"); for (Iarray::const_iterator p = placeHolder.begin(); p != placeHolder.end(); ++p) mprintf(" %i", *p + 1); mprintf("\n"); // Try to give placeholders more reasonable coordinates. if (!placeHolder.empty()) { Iarray current_indices; unsigned int pidx = 0; while (pidx < placeHolder.size()) { if (current_indices.empty()) { current_indices.push_back( placeHolder[pidx++] ); // Search for the end of this segment for (; pidx != placeHolder.size(); pidx++) { if (placeHolder[pidx] - current_indices.back() > 1) break; current_indices.push_back( placeHolder[pidx] ); } // DEBUG mprintf("\tSegment:"); for (Iarray::const_iterator it = current_indices.begin(); it != current_indices.end(); ++it) mprintf(" %i", *it + 1); // Get coordinates of residues bordering segment. int prev_res = sTop[current_indices.front()].ResNum() - 1; int next_res = sTop[current_indices.back() ].ResNum() + 1; mprintf(" (prev_res=%i, next_res=%i)\n", prev_res+1, next_res+1); Vec3 prev_crd(sFrame.XYZ(current_indices.front() - 1)); Vec3 next_crd(sFrame.XYZ(current_indices.back() + 1)); prev_crd.Print("prev_crd"); next_crd.Print("next_crd"); Vec3 crd_step = (next_crd - prev_crd) / (double)(current_indices.size()+1); crd_step.Print("crd_step"); double* xyz = sFrame.xAddress() + (current_indices.front() * 3); for (unsigned int i = 0; i != current_indices.size(); i++, xyz += 3) { prev_crd += crd_step; xyz[0] = prev_crd[0]; xyz[1] = prev_crd[1]; xyz[2] = prev_crd[2]; } current_indices.clear(); } } } //Topology* sTop = qref.Parm().partialModifyStateByMask( sMask ); //if (sTop == 0) return 1; //Frame sFrame(qref.Coord(), sMask); // Write output traj Trajout_Single trajout; if (trajout.PrepareTrajWrite(outfilename, argIn, &sTop, CoordinateInfo(), 1, fmt)) return 1; if (trajout.WriteSingle(0, sFrame)) return 1; trajout.EndTraj(); return 0; }
// Analysis_TI::Calc_Increment() int Analysis_TI::Calc_Increment() { // Determine max points if not given. int maxpts = avg_max_; if (maxpts == -1) { for (unsigned int idx = 0; idx != input_dsets_.size(); idx++) { DataSet_1D const& ds = static_cast<DataSet_1D const&>( *(input_dsets_[idx]) ); if (maxpts == -1) maxpts = (int)ds.Size(); else if (maxpts != (int)ds.Size()) { mprintf("Warning: # points in '%s' (%zu) is different than %i.\n", ds.legend(), ds.Size(), maxpts); maxpts = std::min( maxpts, (int)ds.Size() ); mprintf("Warning: Will only use %i points.\n", maxpts); } } } if (maxpts < 1) { mprinterr("Error: Max points to use is < 1.\n"); return 1; } if (avg_skip_ >= maxpts) { mprinterr("Error: 'avgskip' (%i) > max (%i).\n", avg_skip_, maxpts); return 1; } // sum: Hold the results of integration for each curve (increment) Darray sum; // points: Hold point values at which each avg is being calculated Iarray points; // Loop over input data sets. for (unsigned int idx = 0; idx != input_dsets_.size(); idx++) { DataSet_1D const& ds = static_cast<DataSet_1D const&>( *(input_dsets_[idx]) ); if (CheckSet(ds)) return 1; // Calculate averages for each increment Darray avg; Iarray increments; int count = 0; int endpt = maxpts -1; double currentSum = 0.0; if (debug_ > 0) mprintf("DEBUG: Lambda %g\n", xval_[idx]); for (int pt = avg_skip_; pt != maxpts; pt++) { currentSum += ds.Dval(pt); count++; if (count == avg_increment_ || pt == endpt) { avg.push_back( currentSum / ((double)(pt - avg_skip_ + 1)) ); increments.push_back(pt+1); if (debug_ > 0) mprintf("DEBUG:\t\tAvg from %i to %i: %g\n", avg_skip_+1, pt+1, avg.back()); count = 0; } } if (sum.empty()) { sum.resize(avg.size()); points = increments; } else if (sum.size() != avg.size()) { mprinterr("Error: Different # of increments for set '%s'; got %zu, expected %zu.\n", ds.legend(), avg.size(), sum.size()); return 1; } // Create increment curve data sets if (curve_.empty()) { MetaData md(dAout_->Meta().Name(), "TIcurve"); for (unsigned int j = 0; j != avg.size(); j++) { md.SetIdx( increments[j] ); DataSet* ds = masterDSL_->AddSet(DataSet::XYMESH, md); if (ds == 0) return Analysis::ERR; ds->ModifyDim(Dimension::X).SetLabel("Lambda"); ds->SetLegend( md.Name() + "_Skip" + integerToString(increments[j]) ); if (curveout_ != 0) curveout_->AddDataSet( ds ); curve_.push_back( ds ); } } for (unsigned int j = 0; j != avg.size(); j++) { DataSet_Mesh& CR = static_cast<DataSet_Mesh&>( *(curve_[j]) ); CR.AddXY(xval_[idx], avg[j]); if (mode_ == GAUSSIAN_QUAD) sum[j] += (wgt_[idx] * avg[j]); } } // END loop over data sets if (mode_ == TRAPEZOID) Integrate_Trapezoid(sum); // Store final integration values DataSet_Mesh& DA = static_cast<DataSet_Mesh&>( *dAout_ ); DA.ModifyDim(Dimension::X).SetLabel("Point"); for (unsigned int j = 0; j != points.size(); j++) DA.AddXY(points[j], sum[j]); return 0; }
/** Find potential symmetric atoms. All residues up to the last selected * residue are considered. */ int SymmetricRmsdCalc::SetupSymmRMSD(Topology const& topIn, AtomMask const& tgtMask, bool remapIn) { // Allocate space for remapping selected atoms in target frame. This will // also put the correct masses in based on the mask. tgtRemap_.SetupFrameFromMask(tgtMask, topIn.Atoms()); // Create map of original atom numbers to selected indices Iarray SelectedIdx( topIn.Natom(), -1 ); int tgtIdx = 0; for (int originalAtom = 0; originalAtom != topIn.Natom(); ++originalAtom) if ( originalAtom == tgtMask[tgtIdx] ) SelectedIdx[originalAtom] = tgtIdx++; if (debug_ > 0) { mprintf("DEBUG: Original atom -> Selected Index mapping:\n"); for (int originalAtom = 0; originalAtom != topIn.Natom(); ++originalAtom) mprintf("\t%8i -> %8i\n", originalAtom + 1, SelectedIdx[originalAtom] + 1); } // Create initial 1 to 1 atom map for all selected atoms; indices in // SymmetricAtomIndices will correspond to positions in AMap. AMap_.resize( tgtRemap_.Natom() ); // Determine last selected residue. int last_res = topIn[tgtMask.back()].ResNum() + 1; mprintf("\tResidues up to %s will be considered for symmetry correction.\n", topIn.TruncResNameNum(last_res-1).c_str()); // In each residue, determine which selected atoms are symmetric. SymmetricAtomIndices_.clear(); AtomMap resmap; if (debug_ > 1) resmap.SetDebug(1); for (int res = 0; res < last_res; ++res) { AtomMap::AtomIndexArray residue_SymmetricGroups; if (resmap.SymmetricAtoms(topIn, residue_SymmetricGroups, res)) { mprinterr("Error: Finding symmetric atoms in residue '%s'\n", topIn.TruncResNameNum(res).c_str()); return 1; } if (!residue_SymmetricGroups.empty()) { // Which atoms in symmetric groups are selected? bool resHasSelectedSymmAtoms = false; for (AtomMap::AtomIndexArray::const_iterator symmGroup = residue_SymmetricGroups.begin(); symmGroup != residue_SymmetricGroups.end(); ++symmGroup) { Iarray selectedAtomIndices; for (Iarray::const_iterator atnum = symmGroup->begin(); atnum != symmGroup->end(); ++atnum) { if ( SelectedIdx[*atnum] != -1 ) selectedAtomIndices.push_back( SelectedIdx[*atnum] ); // Store tgtMask indices } if (!selectedAtomIndices.empty()) { SymmetricAtomIndices_.push_back( selectedAtomIndices ); resHasSelectedSymmAtoms = true; } } // If remapping and not all atoms in a residue are selected, warn user. // TODO: Should they just be considered even if not selected? if (remapIn && resHasSelectedSymmAtoms) { for (int atom = topIn.Res(res).FirstAtom(); atom != topIn.Res(res).LastAtom(); ++atom) if (SelectedIdx[atom] == -1) { mprintf("Warning: Not all atoms selected in residue '%s'. Re-mapped\n" "Warning: structures may appear distorted.\n", topIn.TruncResNameNum(res).c_str()); break; } } } } if (debug_ > 0) { mprintf("DEBUG: Potential Symmetric Atom Groups:\n"); for (AtomIndexArray::const_iterator symmatoms = SymmetricAtomIndices_.begin(); symmatoms != SymmetricAtomIndices_.end(); ++symmatoms) { mprintf("\t%8u) ", symmatoms - SymmetricAtomIndices_.begin()); for (Iarray::const_iterator atom = symmatoms->begin(); atom != symmatoms->end(); ++atom) mprintf(" %s(%i)", topIn.AtomMaskName(tgtMask[*atom]).c_str(), tgtMask[*atom] + 1); mprintf("\n"); } } return 0; }
/** selectedTgt and centeredREF must correspond to each other. */ double SymmetricRmsdCalc::SymmRMSD_CenteredRef(Frame const& selectedTgt, Frame const& centeredREF) { // Create initial 1 to 1 atom map for all atoms; indices in // SymmetricAtomIndices will correspond to positions in AMap. for (int atom = 0; atom < (int)AMap_.size(); atom++) AMap_[atom] = atom; tgtRemap_.SetCoordinates(selectedTgt); // Calculate initial best fit RMSD if necessary if (fit_) { tgtRemap_.RMSD_CenteredRef(centeredREF, rotMatrix_, tgtTrans_, useMass_); // Since tgtRemap is moved to origin during RMSD calc and centeredREF // should already be at the origin, just rotate. tgtRemap_.Rotate( rotMatrix_ ); } // Correct RMSD for symmetry for (AtomIndexArray::const_iterator symmatoms = SymmetricAtomIndices_.begin(); symmatoms != SymmetricAtomIndices_.end(); ++symmatoms) { // For each array of symmetric atoms, determine the lowest distance score # ifdef DEBUGSYMMRMSD mprintf(" Symmetric atoms group %u starting with atom %i\n", symmatoms - SymmetricAtomIndices_.begin(), tgtMask_[symmatoms->front()] + 1); # endif cost_matrix_.Initialize( symmatoms->size() ); for (Iarray::const_iterator ta = symmatoms->begin(); ta != symmatoms->end(); ++ta) { for (Iarray::const_iterator ra = symmatoms->begin(); ra != symmatoms->end(); ++ra) { double dist2 = DIST2_NoImage( centeredREF.XYZ(*ra), tgtRemap_.XYZ(*ta) ); # ifdef DEBUGSYMMRMSD mprintf("\t\t%i to %i: %f\n", tgtMask_[*ta] + 1, tgtMask_[*ra] + 1, dist2); # endif cost_matrix_.AddElement( dist2 ); } } Iarray resMap = cost_matrix_.Optimize(); # ifdef DEBUGSYMMRMSD mprintf("\tMapping from Hungarian Algorithm:\n"); for (Iarray::const_iterator ha = resMap.begin(); ha != resMap.end(); ++ha) mprintf("\t\tMap col=%u row=%i\n", ha - resMap.begin(), *ha); # endif // Fill in overall map Iarray::const_iterator rmap = resMap.begin(); for (Iarray::const_iterator atmidx = symmatoms->begin(); atmidx != symmatoms->end(); ++atmidx, ++rmap) { AMap_[*atmidx] = (*symmatoms)[*rmap]; # ifdef DEBUGSYMMRMSD mprintf("\tAssigned atom %i to atom %i\n", tgtMask_[*atmidx] + 1, tgtMask_[(*symmatoms)[*rmap]] + 1); # endif } } # ifdef DEBUGSYMMRMSD mprintf(" Final Atom Mapping:\n"); for (unsigned int ref = 0; ref < AMap_.size(); ++ref) mprintf("\t%u -> %i\n", tgtMask_[ref] + 1, tgtMask_[AMap_[ref]] + 1); mprintf("----------------------------------------\n"); # endif // Remap the target frame for symmetry, then calculate new RMSD. // TODO: Does the topology need to be remapped as well? double rmsdval; tgtRemap_.SetCoordinatesByMap(selectedTgt, AMap_); if (fit_) rmsdval = tgtRemap_.RMSD_CenteredRef( centeredREF, rotMatrix_, tgtTrans_, useMass_ ); else rmsdval = tgtRemap_.RMSD_NoFit( centeredREF, useMass_ ); return rmsdval; }
// Cluster_Kmeans::Cluster() int Cluster_Kmeans::Cluster() { // First determine which frames are being clustered. Iarray const& FramesToCluster = FrameDistances().FramesToCluster(); // Determine seeds FindKmeansSeeds( FramesToCluster ); if (mode_ == RANDOM) RN_.rn_set( kseed_ ); int pointCount = (int)FramesToCluster.size(); // This array will hold the indices of the points to process each iteration. // If sequential this is just 0 -> pointCount. If random this will be // reassigned each iteration. Iarray PointIndices; PointIndices.reserve( pointCount ); for (int processIdx = 0; processIdx != pointCount; processIdx++) PointIndices.push_back( processIdx ); // Add the seed clusters for (Iarray::const_iterator seedIdx = SeedIndices_.begin(); seedIdx != SeedIndices_.end(); ++seedIdx) { int seedFrame = FramesToCluster[ *seedIdx ]; // A centroid is created for new clusters. AddCluster( ClusterDist::Cframes(1, seedFrame) ); // NOTE: No need to calc best rep frame, only 1 frame. if (debug_ > 0) mprintf("Put frame %i in cluster %i (seed index=%i).\n", seedFrame, clusters_.back().Num(), *seedIdx); } // Assign points in 3 passes. If a point looked like it belonged to cluster A // at first, but then we added many other points and altered our cluster // shapes, its possible that we will want to reassign it to cluster B. for (int iteration = 0; iteration != maxIt_; iteration++) { if (mode_ == RANDOM) ShufflePoints( PointIndices ); // Add each point to an existing cluster, and recompute centroid mprintf("\tRound %i: ", iteration); ProgressBar progress( PointIndices.size() ); int Nchanged = 0; int prog = 0; for (Iarray::const_iterator pointIdx = PointIndices.begin(); pointIdx != PointIndices.end(); ++pointIdx, ++prog) { if (debug_ < 1) progress.Update( prog ); int oldClusterIdx = -1; // if ( iteration != 0 || mode_ != SEQUENTIAL) // FIXME: Should this really happen for RANDOM // { int pointFrame = FramesToCluster[ *pointIdx ]; if (debug_ > 0) mprintf("DEBUG: Processing frame %i (index %i)\n", pointFrame, *pointIdx); bool pointWasYanked = true; if (iteration > 0) { // Yank this point out of its cluster, recompute the centroid for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1) { if (C1->HasFrame( pointFrame )) { // If this point is alone in its cluster its in the right place if (C1->Nframes() == 1) { pointWasYanked = false; continue; // FIXME: should this be a break? } //oldBestRep = C1->BestRepFrame(); oldClusterIdx = C1->Num(); C1->RemoveFrameUpdateCentroid( Cdist_, pointFrame ); // TEST // C1->RemoveFrameFromCluster( pointFrame ); //newBestRep = C1->FindBestRepFrame(); // C1->CalculateCentroid( Cdist_ ); if (debug_ > 0) mprintf("Remove Frame %i from cluster %i\n", pointFrame, C1->Num()); //if (clusterToClusterCentroid_) { // if (oldBestRep != NewBestRep) // C1->AlignToBestRep( Cdist_ ); // FIXME: Only relevant for COORDS dist? // C1->CalculateCentroid( Cdist_ ); // FIXME: Seems unnessecary to align prior //} } } } else { // First iteration. If this point is already in a cluster it is a seed. for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1) { if (C1->HasFrame( pointFrame )) { pointWasYanked = false; if (debug_ > 0) mprintf("Frame %i was already used to seed cluster %i\n", pointFrame, C1->Num()); continue; // FIXME break? } } } if (pointWasYanked) { // Find out what cluster this point is now closest to. double closestDist = -1.0; cluster_it closestCluster = clusters_.begin(); for (cluster_it C1 = clusters_.begin(); C1 != clusters_.end(); ++C1) { double dist = Cdist_->FrameCentroidDist(pointFrame, C1->Cent()); if (closestDist < 0.0 || dist < closestDist) { closestDist = dist; closestCluster = C1; } } //oldBestRep = closestCluster->BestRepFrame(); closestCluster->AddFrameUpdateCentroid( Cdist_, pointFrame ); // TEST // closestCluster->AddFrameToCluster( pointFrame ); //newBestRep = closestCluster->FindBestFrameFrame(); // closestCluster->CalculateCentroid( Cdist_ ); if (closestCluster->Num() != oldClusterIdx) { Nchanged++; if (debug_ > 0) mprintf("Remove Frame %i from cluster %i, but add to cluster %i (dist= %f).\n", pointFrame, oldClusterIdx, closestCluster->Num(), closestDist); } else { if (debug_ > 0) mprintf("Frame %i staying in cluster %i\n", pointFrame, closestCluster->Num()); } if (clusterToClusterCentroid_) { //if (oldBestRep != NewBestRep) { // C1->AlignToBestRep( Cdist_ ); // FIXME: Only relevant for COORDS dist? // C1->CalculateCentroid( Cdist_ ); // FIXME: Seems unnessecary to align prior //} } } // } } // END loop over points to cluster if (Nchanged == 0) { mprintf("\tK-means round %i: No change. Skipping the rest of the iterations.\n", iteration); break; } else mprintf("\tK-means round %i: %i points changed cluster assignment.\n", iteration, Nchanged); } // END k-means iterations // Remove any empty clusters // FIXME: Will there ever be empty clusters? RemoveEmptyClusters(); // NOTE in PTRAJ here align all frames to best rep return 0; }
/** Find some seed-points for K-means clustering. Take the first point as an * arbitrary first choice. Then, at each iteration, add the point whose total * distance from our set of seeds is as large as possible. */ int Cluster_Kmeans::FindKmeansSeeds(Iarray const& FramesToCluster) { // SeedIndices will hold indices into FramesToCluster SeedIndices_.resize( nclusters_, 1 ); // 1 used to be consistent with ptraj double bestDistance = 0.0; int frameCount = (int)FramesToCluster.size(); for (int frameIdx = 0; frameIdx != frameCount; frameIdx++) { int seedFrame = FramesToCluster[ frameIdx ]; for (int candidateIdx = frameIdx; candidateIdx < frameCount; candidateIdx++) { int candidateFrame = FramesToCluster[ candidateIdx ]; double dist = FrameDistances().GetFdist( seedFrame, candidateFrame ); if (dist > bestDistance) { bestDistance = dist; SeedIndices_[0] = frameIdx; SeedIndices_[1] = candidateIdx; } } } for (int seedIdx = 2; seedIdx != nclusters_; seedIdx++) { bestDistance = 0.0; int bestIdx = 0; for (int candidateIdx = 0; candidateIdx < frameCount; candidateIdx++) { // Make sure this candidate isnt already a seed bool skipCandidate = false; for (int checkIdx = 0; checkIdx != seedIdx; checkIdx++) { if (SeedIndices_[checkIdx] == candidateIdx) { skipCandidate = true; break; } } if (!skipCandidate) { // Get the closest distance from this candidate to a current seed int candidateFrame = FramesToCluster[ candidateIdx ]; double nearestDist = -1.0; for (int checkIdx = 0; checkIdx != seedIdx; checkIdx++) { int seedFrame = FramesToCluster[ SeedIndices_[checkIdx] ]; double dist = FrameDistances().GetFdist( candidateFrame, seedFrame ); if (dist < nearestDist || nearestDist < 0.0) nearestDist = dist; } // Is this the best so far? if (nearestDist > bestDistance) { bestDistance = nearestDist; bestIdx = candidateIdx; } } } SeedIndices_[seedIdx] = bestIdx; } if (debug_ > 0) for (unsigned int si = 0; si != SeedIndices_.size(); si++) mprintf("DEBUG:\t\tSeedIndices[%u]= %i\n", si, SeedIndices_[si]); return 0; }