int SequenceAlign(CpptrajState& State, ArgList& argIn) { std::string blastfile = argIn.GetStringKey("blastfile"); if (blastfile.empty()) { mprinterr("Error: 'blastfile' must be specified.\n"); return 1; } ReferenceFrame qref = State.DSL()->GetReferenceFrame(argIn); if (qref.error() || qref.empty()) { mprinterr("Error: Must specify reference structure for query.\n"); return 1; } std::string outfilename = argIn.GetStringKey("out"); if (outfilename.empty()) { mprinterr("Error: Must specify output file.\n"); return 1; } TrajectoryFile::TrajFormatType fmt = TrajectoryFile::GetFormatFromArg(argIn); if (fmt != TrajectoryFile::PDBFILE && fmt != TrajectoryFile::MOL2FILE) fmt = TrajectoryFile::PDBFILE; // Default to PDB int smaskoffset = argIn.getKeyInt("smaskoffset", 0) + 1; int qmaskoffset = argIn.getKeyInt("qmaskoffset", 0) + 1; // Load blast file mprintf("\tReading BLAST alignment from '%s'\n", blastfile.c_str()); BufferedLine infile; if (infile.OpenFileRead( blastfile )) return 1; // Seek down to first Query line. const char* ptr = infile.Line(); bool atFirstQuery = false; while (ptr != 0) { if (*ptr == 'Q') { if ( strncmp(ptr, "Query", 5) == 0 ) { atFirstQuery = true; break; } } ptr = infile.Line(); } if (!atFirstQuery) { mprinterr("Error: 'Query' not found.\n"); return 1; } // Read alignment. Replacing query with subject. typedef std::vector<char> Carray; typedef std::vector<int> Iarray; Carray Query; // Query residues Carray Sbjct; // Sbjct residues Iarray Smap; // Smap[Sbjct index] = Query index while (ptr != 0) { const char* qline = ptr; // query line const char* aline = infile.Line(); // alignment line const char* sline = infile.Line(); // subject line if (aline == 0 || sline == 0) { mprinterr("Error: Missing alignment line or subject line after Query:\n"); mprinterr("Error: %s", qline); return 1; } for (int idx = 12; qline[idx] != ' '; idx++) { if (qline[idx] == '-') { // Sbjct does not have corresponding res in Query Smap.push_back(-1); Sbjct.push_back( sline[idx] ); } else if (sline[idx] == '-') { // Query does not have a corresponding res in Sbjct Query.push_back( qline[idx] ); } else { // Direct Query to Sbjct map Smap.push_back( Query.size() ); Sbjct.push_back( sline[idx] ); Query.push_back( qline[idx] ); } } // Scan to next Query ptr = infile.Line(); while (ptr != 0) { if (*ptr == 'Q') { if ( strncmp(ptr, "Query", 5) == 0 ) break; } ptr = infile.Line(); } } // DEBUG std::string SmaskExp, QmaskExp; if (State.Debug() > 0) mprintf(" Map of Sbjct to Query:\n"); for (int sres = 0; sres != (int)Sbjct.size(); sres++) { if (State.Debug() > 0) mprintf("%-i %3s %i", sres+smaskoffset, Residue::ConvertResName(Sbjct[sres]), Smap[sres]+qmaskoffset); const char* qres = ""; if (Smap[sres] != -1) { qres = Residue::ConvertResName(Query[Smap[sres]]); if (SmaskExp.empty()) SmaskExp.assign( integerToString(sres+smaskoffset) ); else SmaskExp.append( "," + integerToString(sres+smaskoffset) ); if (QmaskExp.empty()) QmaskExp.assign( integerToString(Smap[sres]+qmaskoffset) ); else QmaskExp.append( "," + integerToString(Smap[sres]+qmaskoffset) ); } if (State.Debug() > 0) mprintf(" %3s\n", qres); } mprintf("Smask: %s\n", SmaskExp.c_str()); mprintf("Qmask: %s\n", QmaskExp.c_str()); // Check that query residues match reference. for (unsigned int sres = 0; sres != Sbjct.size(); sres++) { int qres = Smap[sres]; if (qres != -1) { if (Query[qres] != qref.Parm().Res(qres).SingleCharName()) { mprintf("Warning: Potential residue mismatch: Query %s reference %s\n", Residue::ConvertResName(Query[qres]), qref.Parm().Res(qres).c_str()); } } } // Build subject using coordinate from reference. //AtomMask sMask; // Contain atoms that should be in sTop Topology sTop; Frame sFrame; Iarray placeHolder; // Atom indices of placeholder residues. for (unsigned int sres = 0; sres != Sbjct.size(); sres++) { int qres = Smap[sres]; NameType SresName( Residue::ConvertResName(Sbjct[sres]) ); if (qres != -1) { Residue const& QR = qref.Parm().Res(qres); Residue SR(SresName, sres+1, ' ', QR.ChainID()); if (Query[qres] == Sbjct[sres]) { // Exact match. All non-H atoms. for (int qat = QR.FirstAtom(); qat != QR.LastAtom(); qat++) { if (qref.Parm()[qat].Element() != Atom::HYDROGEN) sTop.AddTopAtom( qref.Parm()[qat], SR ); sFrame.AddXYZ( qref.Coord().XYZ(qat) ); //sMask.AddAtom(qat); } } else { // Partial match. Copy only backbone and CB. for (int qat = QR.FirstAtom(); qat != QR.LastAtom(); qat++) { if ( qref.Parm()[qat].Name().Match("N" ) || qref.Parm()[qat].Name().Match("CA") || qref.Parm()[qat].Name().Match("CB") || qref.Parm()[qat].Name().Match("C" ) || qref.Parm()[qat].Name().Match("O" ) ) { sTop.AddTopAtom( qref.Parm()[qat], SR ); sFrame.AddXYZ( qref.Coord().XYZ(qat) ); } } } } else { // Residue in query does not exist for subject. Just put placeholder CA for now. Vec3 Zero(0.0); placeHolder.push_back( sTop.Natom() ); sTop.AddTopAtom( Atom("CA", "C "), Residue(SresName, sres+1, ' ', ' ') ); sFrame.AddXYZ( Zero.Dptr() ); } } //sTop.PrintAtomInfo("*"); mprintf("\tPlaceholder residue indices:"); for (Iarray::const_iterator p = placeHolder.begin(); p != placeHolder.end(); ++p) mprintf(" %i", *p + 1); mprintf("\n"); // Try to give placeholders more reasonable coordinates. if (!placeHolder.empty()) { Iarray current_indices; unsigned int pidx = 0; while (pidx < placeHolder.size()) { if (current_indices.empty()) { current_indices.push_back( placeHolder[pidx++] ); // Search for the end of this segment for (; pidx != placeHolder.size(); pidx++) { if (placeHolder[pidx] - current_indices.back() > 1) break; current_indices.push_back( placeHolder[pidx] ); } // DEBUG mprintf("\tSegment:"); for (Iarray::const_iterator it = current_indices.begin(); it != current_indices.end(); ++it) mprintf(" %i", *it + 1); // Get coordinates of residues bordering segment. int prev_res = sTop[current_indices.front()].ResNum() - 1; int next_res = sTop[current_indices.back() ].ResNum() + 1; mprintf(" (prev_res=%i, next_res=%i)\n", prev_res+1, next_res+1); Vec3 prev_crd(sFrame.XYZ(current_indices.front() - 1)); Vec3 next_crd(sFrame.XYZ(current_indices.back() + 1)); prev_crd.Print("prev_crd"); next_crd.Print("next_crd"); Vec3 crd_step = (next_crd - prev_crd) / (double)(current_indices.size()+1); crd_step.Print("crd_step"); double* xyz = sFrame.xAddress() + (current_indices.front() * 3); for (unsigned int i = 0; i != current_indices.size(); i++, xyz += 3) { prev_crd += crd_step; xyz[0] = prev_crd[0]; xyz[1] = prev_crd[1]; xyz[2] = prev_crd[2]; } current_indices.clear(); } } } //Topology* sTop = qref.Parm().partialModifyStateByMask( sMask ); //if (sTop == 0) return 1; //Frame sFrame(qref.Coord(), sMask); // Write output traj Trajout_Single trajout; if (trajout.PrepareTrajWrite(outfilename, argIn, &sTop, CoordinateInfo(), 1, fmt)) return 1; if (trajout.WriteSingle(0, sFrame)) return 1; trajout.EndTraj(); return 0; }
/** Determine what atoms each mask pertains to for the current parm file. */ Action::RetType Action_NMRrst::Setup(ActionSetup& setup) { if (!viewrst_.empty() && rsttop_ == 0) rsttop_ = setup.TopAddress(); // --------------------------------------------- // Set up NOEs from file. for (noeDataArray::iterator noe = NOEs_.begin(); noe != NOEs_.end(); ++noe) { if (setup.Top().SetupIntegerMask( noe->dMask1_ )) return Action::ERR; if (setup.Top().SetupIntegerMask( noe->dMask2_ )) return Action::ERR; if (noe->dMask1_.None() || noe->dMask2_.None()) { mprintf("Warning: One or both masks for NOE '%s' have no atoms (%i and %i).\n", noe->dist_->legend(), noe->dMask1_.Nselected(), noe->dMask2_.Nselected()); noe->active_ = false; } else noe->active_ = true; } // --------------------------------------------- // Set up potential NOE sites. if (findNOEs_) { if (setup.Top().SetupCharMask( Mask_ )) return Action::ERR; Mask_.MaskInfo(); if (Mask_.None()) return Action::SKIP; SiteArray potentialSites; // .clear(); AtomMap resMap; resMap.SetDebug( debug_ ); std::vector<bool> selected; Range soluteRes = setup.Top().SoluteResidues(); for (Range::const_iterator res = soluteRes.begin(); res != soluteRes.end(); ++res) { int res_first_atom = setup.Top().Res(*res).FirstAtom(); selected.assign( setup.Top().Res(*res).NumAtoms(), false ); // Find symmetric atom groups. AtomMap::AtomIndexArray symmGroups; if (resMap.SymmetricAtoms(setup.Top(), symmGroups, *res)) return Action::ERR; // DEBUG if (debug_ > 0) { mprintf("DEBUG: Residue %i: symmetric atom groups:\n", *res + 1); for (AtomMap::AtomIndexArray::const_iterator grp = symmGroups.begin(); grp != symmGroups.end(); ++grp) { mprintf("\t\t"); for (AtomMap::Iarray::const_iterator at = grp->begin(); at != grp->end(); ++at) mprintf(" %s", setup.Top().TruncAtomNameNum( *at ).c_str()); mprintf("\n"); } } // Each symmetric hydrogen atom group is a site. for (AtomMap::AtomIndexArray::const_iterator grp = symmGroups.begin(); grp != symmGroups.end(); ++grp) { // NOTE: If first atom is H all should be H. if ( setup.Top()[ grp->front() ].Element() == Atom::HYDROGEN ) { Iarray symmAtomGroup; for (Iarray::const_iterator at = grp->begin(); at != grp->end(); ++at) if (Mask_.AtomInCharMask( *at )) symmAtomGroup.push_back( *at ); if (!symmAtomGroup.empty()) { potentialSites.push_back( Site(*res, symmAtomGroup) ); // Mark symmetric atoms as selected. for (AtomMap::Iarray::const_iterator at = grp->begin(); at != grp->end(); ++at) selected[ *at - res_first_atom ] = true; } } } // All other non-selected hydrogens bonded to same heavy atom are sites. for (int ratom = res_first_atom; ratom != setup.Top().Res(*res).LastAtom(); ++ratom) { if ( setup.Top()[ratom].Element() != Atom::HYDROGEN ) { Iarray heavyAtomGroup; for (Atom::bond_iterator ba = setup.Top()[ratom].bondbegin(); ba != setup.Top()[ratom].bondend(); ++ba) if ( Mask_.AtomInCharMask(*ba) && *ba >= res_first_atom && *ba < setup.Top().Res(*res).LastAtom() ) { if ( !selected[ *ba - res_first_atom ] && setup.Top()[ *ba ].Element() == Atom::HYDROGEN ) heavyAtomGroup.push_back( *ba ); } if (!heavyAtomGroup.empty()) potentialSites.push_back( Site(*res, heavyAtomGroup) ); } } } mprintf("\t%zu potential NOE sites:\n", potentialSites.size()); for (SiteArray::const_iterator site = potentialSites.begin(); site != potentialSites.end(); ++site) { mprintf(" %u\tRes %i:", site - potentialSites.begin(), site->ResNum()+1); for (unsigned int idx = 0; idx != site->Nindices(); ++idx) mprintf(" %s", setup.Top().TruncAtomNameNum( site->Idx(idx) ).c_str()); mprintf("\n"); } if (noeArray_.empty()) { size_t siteArraySize = 0; // Set up all potential NOE pairs. Keep track of size. for (SiteArray::const_iterator site1 = potentialSites.begin(); site1 != potentialSites.end(); ++site1) { for (SiteArray::const_iterator site2 = site1 + 1; site2 != potentialSites.end(); ++site2) { if (site1->ResNum() != site2->ResNum()) { std::string legend = site1->SiteLegend(setup.Top()) + "--" + site2->SiteLegend(setup.Top()); DataSet* ds = 0; if (series_) { ds = masterDSL_->AddSet(DataSet::FLOAT, MetaData(setname_, "foundNOE", noeArray_.size())); if (ds == 0) return Action::ERR; // Construct a data set name. ds->SetLegend(legend); } noeArray_.push_back( NOEtype(*site1, *site2, ds, legend) ); siteArraySize += (2 * sizeof(int) * site1->Nindices()) + (2 * sizeof(int) * site2->Nindices()); } } } numNoePairs_ = noeArray_.size(); size_t siteSize = sizeof(int) + (2 * sizeof(Iarray)) + sizeof(Site); size_t noeSize = (2 * siteSize) + sizeof(DataSet*) + sizeof(double) + sizeof(NOEtype); if (series_) noeSize += sizeof(std::vector<float>); size_t noeArraySize = (noeSize * numNoePairs_) + siteArraySize; if (series_) noeArraySize += (setup.Nframes() * numNoePairs_ * sizeof(float)); mprintf("\t%zu potential NOE pairs. Estimated memory usage is %s\n", numNoePairs_, ByteString(noeArraySize, BYTE_DECIMAL).c_str()); } else if (numNoePairs_ != potentialSites.size()) { mprinterr("Warning: Found NOE matrix has already been set up for %zu potential\n" "Warning: NOEs, but %zu NOEs currently found.\n", numNoePairs_, potentialSites.size()); return Action::SKIP; } } // --------------------------------------------- // Set up NOEs specified on the command line if (!Pairs_.empty()) { if (!specifiedNOEs_.empty()) { mprintf("Warning: Specifying NOEs currently only works with first topology used.\n"); return Action::SKIP; } for (MaskPairArray::iterator mp = Pairs_.begin(); mp != Pairs_.end(); mp++) { if (setup.Top().SetupIntegerMask( mp->first )) return Action::ERR; int res1 = CheckSameResidue(setup.Top(), mp->first); if (res1 < 0) continue; if (setup.Top().SetupIntegerMask( mp->second )) return Action::ERR; int res2 = CheckSameResidue(setup.Top(), mp->second); if (res2 < 0) continue; Site site1( res1, mp->first.Selected() ); Site site2( res2, mp->second.Selected() ); std::string legend = site1.SiteLegend(setup.Top()) + "--" + site2.SiteLegend(setup.Top()); DataSet* ds = 0; if (series_) { ds = masterDSL_->AddSet(DataSet::FLOAT, MetaData(setname_, "specNOE", specifiedNOEs_.size())); if (ds == 0) return Action::ERR; ds->SetLegend(legend); } specifiedNOEs_.push_back( NOEtype(site1, site2, ds, legend) ); } } // Set up imaging info for this parm Image_.SetupImaging( setup.CoordInfo().TrajBox().Type() ); if (Image_.ImagingEnabled()) mprintf("\tImaged.\n"); else mprintf("\tImaging off.\n"); return Action::OK; }
/** Find potential symmetric atoms. All residues up to the last selected * residue are considered. */ int SymmetricRmsdCalc::SetupSymmRMSD(Topology const& topIn, AtomMask const& tgtMask, bool remapIn) { // Allocate space for remapping selected atoms in target frame. This will // also put the correct masses in based on the mask. tgtRemap_.SetupFrameFromMask(tgtMask, topIn.Atoms()); // Create map of original atom numbers to selected indices Iarray SelectedIdx( topIn.Natom(), -1 ); int tgtIdx = 0; for (int originalAtom = 0; originalAtom != topIn.Natom(); ++originalAtom) if ( originalAtom == tgtMask[tgtIdx] ) SelectedIdx[originalAtom] = tgtIdx++; if (debug_ > 0) { mprintf("DEBUG: Original atom -> Selected Index mapping:\n"); for (int originalAtom = 0; originalAtom != topIn.Natom(); ++originalAtom) mprintf("\t%8i -> %8i\n", originalAtom + 1, SelectedIdx[originalAtom] + 1); } // Create initial 1 to 1 atom map for all selected atoms; indices in // SymmetricAtomIndices will correspond to positions in AMap. AMap_.resize( tgtRemap_.Natom() ); // Determine last selected residue. int last_res = topIn[tgtMask.back()].ResNum() + 1; mprintf("\tResidues up to %s will be considered for symmetry correction.\n", topIn.TruncResNameNum(last_res-1).c_str()); // In each residue, determine which selected atoms are symmetric. SymmetricAtomIndices_.clear(); AtomMap resmap; if (debug_ > 1) resmap.SetDebug(1); for (int res = 0; res < last_res; ++res) { AtomMap::AtomIndexArray residue_SymmetricGroups; if (resmap.SymmetricAtoms(topIn, residue_SymmetricGroups, res)) { mprinterr("Error: Finding symmetric atoms in residue '%s'\n", topIn.TruncResNameNum(res).c_str()); return 1; } if (!residue_SymmetricGroups.empty()) { // Which atoms in symmetric groups are selected? bool resHasSelectedSymmAtoms = false; for (AtomMap::AtomIndexArray::const_iterator symmGroup = residue_SymmetricGroups.begin(); symmGroup != residue_SymmetricGroups.end(); ++symmGroup) { Iarray selectedAtomIndices; for (Iarray::const_iterator atnum = symmGroup->begin(); atnum != symmGroup->end(); ++atnum) { if ( SelectedIdx[*atnum] != -1 ) selectedAtomIndices.push_back( SelectedIdx[*atnum] ); // Store tgtMask indices } if (!selectedAtomIndices.empty()) { SymmetricAtomIndices_.push_back( selectedAtomIndices ); resHasSelectedSymmAtoms = true; } } // If remapping and not all atoms in a residue are selected, warn user. // TODO: Should they just be considered even if not selected? if (remapIn && resHasSelectedSymmAtoms) { for (int atom = topIn.Res(res).FirstAtom(); atom != topIn.Res(res).LastAtom(); ++atom) if (SelectedIdx[atom] == -1) { mprintf("Warning: Not all atoms selected in residue '%s'. Re-mapped\n" "Warning: structures may appear distorted.\n", topIn.TruncResNameNum(res).c_str()); break; } } } } if (debug_ > 0) { mprintf("DEBUG: Potential Symmetric Atom Groups:\n"); for (AtomIndexArray::const_iterator symmatoms = SymmetricAtomIndices_.begin(); symmatoms != SymmetricAtomIndices_.end(); ++symmatoms) { mprintf("\t%8u) ", symmatoms - SymmetricAtomIndices_.begin()); for (Iarray::const_iterator atom = symmatoms->begin(); atom != symmatoms->end(); ++atom) mprintf(" %s(%i)", topIn.AtomMaskName(tgtMask[*atom]).c_str(), tgtMask[*atom] + 1); mprintf("\n"); } } return 0; }