int MolCoord::appendAtomScrHelper(MolAtomPtr pAtom, const LString &ch, ResidIndex resid, const LString &resn) { qlib::uid_t nuid = pAtom->getParentUID(); if (nuid!=qlib::invalid_uid) { // pAtom has been already belonged to other mol // --> ERROR!! MB_DPRINTLN("MolCoord.appendAtom> ERROR, pAtom already belongs to mol %d ().", nuid); return -1; } pAtom->setParentUID(getUID()); pAtom->setChainName(ch); pAtom->setResIndex(resid); if (resn.isEmpty()) { // res name is determined by chain name and resindex MolResiduePtr pRes = getResidue(ch, resid); if (pRes.isnull()) { // ERROR!! cannot determine the residue to append to return -1; } pAtom->setResName(pRes->getName()); } else { pAtom->setResName(resn); } return appendAtom(pAtom); }
/// read one MOL entry from stream bool MOL2MolReader::readMol(qlib::LineStream &lin, bool bskip) { LString sline; std::vector<LString> slist; for (;;) { sline = lin.readLine().chomp(); if (sline.isEmpty() && !lin.ready()) return false; // EOF if (sline.equals("@<TRIPOS>MOLECULE")) { break; } } // mol_name LString cmpd_name = lin.readLine().trim(" \t\r\n"); if (cmpd_name.isEmpty()) cmpd_name = "_"; // XXX // molecule info sline = lin.readLine().chomp(); split(sline, ' ', std::back_inserter(slist)); if (slist.size()<1) { MB_THROW(MOL2FormatException, "Invalid atom info record"); } int natoms; if (!slist[0].toInt(&natoms)) { MB_THROW(MOL2FormatException, "Invalid atom info record"); } int nbonds=0; if (slist.size()>1) { if (!slist[1].toInt(&nbonds)) { MB_THROW(MOL2FormatException, "Invalid atom info record"); } } // mol_type LString mol_type = lin.readLine().chomp(); bool bApplyTopo = false; if (mol_type.equals("PROTEIN") || mol_type.equals("NUCLEIC_ACID")) bApplyTopo = true; // Ignore charge_type // Ignore mol_comment // Search ATOM record for (;;) { sline = lin.readLine().chomp(); if (sline.isEmpty() && !lin.ready()) return false; // EOF if (sline.equals("@<TRIPOS>ATOM")) { break; } } int i, idot, iresid, naid, ind, prev_resid; ElemID eleid; double xx, yy, zz; LString aname, atype, satom, res_name; std::map<int,int> atommap; std::map<LString, int> aname_counts; std::map<LString, int>::iterator an_iter; // XXXX prev_resid = -999999; for (i=0; i<natoms; ++i) { //LOG_DPRINTLN("i=%d, natoms=%d", i, natoms); sline = lin.readLine().chomp(); slist.clear(); split(sline, ' ', std::back_inserter(slist)); if (slist.size()<8) { MB_THROW(MOL2FormatException, "Invalid atom record"); } if (!slist[0].toInt(&ind)) { MB_THROW(MOL2FormatException, "Invalid atom ID record"); } aname = slist[1]; an_iter = aname_counts.find(aname); if (an_iter==aname_counts.end()) { aname_counts.insert(std::pair<LString, int>(aname, 1)); } else { an_iter->second = an_iter->second + 1; aname = LString::format("%d%s", an_iter->second, aname.c_str()); } if (!slist[2].toRealNum(&xx)) { MB_THROW(MOL2FormatException, "Invalid atom coord record"); } if (!slist[3].toRealNum(&yy)) { MB_THROW(MOL2FormatException, "Invalid atom coord record"); } if (!slist[4].toRealNum(&zz)) { MB_THROW(MOL2FormatException, "Invalid atom coord record"); } atype = slist[5]; satom = ""; idot = atype.indexOf('.'); if (idot<0) { satom = atype; } else if (idot>0) { satom = atype.substr(0, idot); } else { MB_THROW(MOL2FormatException, "Invalid SYBYL atom type"); } iresid = 0; if (!slist[6].toInt(&iresid)) { MB_THROW(MOL2FormatException, "Invalid atom resid record"); } res_name = slist[7]; if (res_name.equals("<0>")) res_name = cmpd_name; if (bApplyTopo) { // protein or nucleic acid // strip residue number from res_name int ntmp; if (res_name.substr(3).toInt(&ntmp)) { res_name = res_name.substr(0, 3); iresid = ntmp; } if (iresid!=prev_resid) // residue is changed --> clear atom name count aname_counts.clear(); } eleid = ElemSym::str2SymID(satom); // LOG_DPRINTLN("Atom: %f, %f, %f, <%s> %d", xx, yy, zz, aname.c_str(), eleid); if (!bskip) { MolAtomPtr pAtom = MolAtomPtr(MB_NEW MolAtom()); pAtom->setParentUID(m_pMol->getUID()); pAtom->setName(aname); pAtom->setElement(eleid); pAtom->setChainName(m_sCurrChName); pAtom->setResIndex(iresid); pAtom->setResName(res_name); pAtom->setPos(Vector4D(xx,yy,zz)); pAtom->setBfac(0.0); pAtom->setOcc(1.0); naid = m_pMol->appendAtom(pAtom); if (naid<0) MB_THROW(MOL2FormatException, "appendAtom() failed"); atommap.insert(std::pair<int,int>(ind, naid)); m_nReadAtoms++; } prev_resid = iresid; } // Search BOND record for (;;) { sline = lin.readLine().chomp(); if (sline.isEmpty() && !lin.ready()) return false; // EOF if (sline.equals("@<TRIPOS>BOND")) { break; } } int natm1, natm2; int natm_id1, natm_id2; std::map<int,int>::const_iterator iter; for (i=0; i<nbonds; ++i) { sline = lin.readLine().chomp(); slist.clear(); split(sline, ' ', std::back_inserter(slist)); if (slist.size()<4) { MB_THROW(MOL2FormatException, "Invalid bond record"); } if (!slist[1].toInt(&natm1)) { MB_THROW(MOL2FormatException, "Invalid bond line (atom1)"); } if (!slist[2].toInt(&natm2)) { MB_THROW(MOL2FormatException, "Invalid bond line (atom2)"); } LString sbont = slist[3]; if (!bskip) { iter = atommap.find(natm1); if (iter==atommap.end()) MB_THROW(MOL2FormatException, "Invalid bond line (bond atom1 not found)"); natm_id1 = iter->second; iter = atommap.find(natm2); if (iter==atommap.end()) MB_THROW(MOL2FormatException, "Invalid bond line (bond atom2 not found)"); natm_id2 = iter->second; MolBond *pB = m_pMol->makeBond(natm_id1, natm_id2, true); if (pB==NULL) MB_THROW(MOL2FormatException, "makeBond failed"); if (sbont.equals("1")) pB->setType(MolBond::SINGLE); else if (sbont.equals("2")) pB->setType(MolBond::DOUBLE); else if (sbont.equals("3")) pB->setType(MolBond::TRIPLE); else if (sbont.equals("ar")||sbont.equals("am")) pB->setType(MolBond::DELOC); m_nReadBonds++; } //LOG_DPRINTLN("bond %d<-->%d: %d", natm_id1, natm_id2, nbont); } if (bApplyTopo) { m_pMol->applyTopology(); if (mol_type.equals("PROTEIN")) m_pMol->calcProt2ndry(-500.0); if (mol_type.equals("NUCLEIC_ACID")) m_pMol->calcBasePair(3.7, 30); } else { // Set noautogen prop to this residue, // to avoid topology autogen, when saved to and loaded from the qdf stream. if (!bskip) { iter = atommap.begin(); if (iter!=atommap.end()) { int aid0 = iter->second; MolAtomPtr pA = m_pMol->getAtom(aid0); if (!pA.isnull()) { MolResiduePtr pRes = pA->getParentResidue(); if (!pRes.isnull()) { pRes->setPropStr("noautogen", "true"); } } } } } /* */ return true; }
// read from stream void PsfReader::read(qlib::InStream &ins) { int i, ires; qlib::LineStream ls(ins); m_pls = &ls; // skip header line readLine(); readLine(); /////////////////// // read REMARK header line readLine(); removeComment(); int ncomment; if (!m_line.toInt(&ncomment)) { MB_THROW(qlib::FileFormatException, "Cannot read ncomment line"); return; } MB_DPRINTLN("ncomment=%d", ncomment); for (i=0; i<ncomment; ++i) { readLine(); m_line = m_line.trim("\r\n "); LOG_DPRINTLN("%s", m_line.c_str()); } readLine(); /////////////////// // read atoms readLine(); removeComment(); if (!m_line.toInt(&m_natom)) { MB_THROW(qlib::FileFormatException, "Cannot read natom line"); return; } MB_DPRINTLN("natoms=%d", m_natom); LString stmp; for (i=0; i<m_natom; ++i) { readLine(); // LOG_DPRINTLN("%s", m_line.c_str()); // chain name stmp = m_line.substr(9, 3); stmp = stmp.trim(" "); // stmp = stmp.toLowerCase(); LString chain(stmp.c_str()); // residue number stmp = m_line.substr(14, 4); int nresi; if (!stmp.toInt(&nresi)) { LString msg = LString::format("cannot convert resid number: %s", stmp.c_str()); MB_THROW(qlib::FileFormatException, msg); return; } ResidIndex residx(nresi); // residue name stmp = m_line.substr(19, 4); stmp = stmp.trim(" "); // stmp = stmp.toLowerCase(); LString resn(stmp.c_str()); // atom name stmp = m_line.substr(24, 4); stmp = stmp.trim(" "); // stmp = stmp.toLowerCase(); LString name(stmp.c_str()); // charge stmp = m_line.substr(34, 10); double charge; if (!stmp.toDouble(&charge)) { LString msg = LString::format("cannot convert charge %s", stmp.c_str()); MB_THROW(qlib::FileFormatException, msg); return; } // mass stmp = m_line.substr(50, 8); double mass; if (!stmp.toDouble(&mass)) { LString msg = LString::format("cannot convert mass <%s>", stmp.c_str()); MB_THROW(qlib::FileFormatException, msg); return; } ElemID eleid = convMassElem(mass); //LOG_DPRINTLN("ATOM %s %s %d %s", //(*pAtoms)[i].name.c_str(), //(*pAtoms)[i].resn.c_str(), //(*pAtoms)[i].resid, //(*pAtoms)[i].chain.c_str()); MolAtomPtr pAtom = MolAtomPtr(MB_NEW MolAtom()); pAtom->setParentUID(m_pMol->getUID()); pAtom->setName(name); pAtom->setElement(eleid); pAtom->setChainName(chain); pAtom->setResIndex(residx); pAtom->setResName(resn); if (m_pMol->appendAtom(pAtom)<0) { LString stmp = m_line; stmp = stmp.chomp(); // stmp = stmp.toUpperCase(); // m_nErrCount ++; // if (m_nErrCount<m_nErrMax) LOG_DPRINTLN("PsfReader> read ATOM line failed: %s", stmp.c_str()); } } readLine(); }