double calcExactMW(const ROMol &mol, bool onlyHeavy) { double res = 0.0; int nHsToCount = 0; const PeriodicTable *table = PeriodicTable::getTable(); for (ROMol::ConstAtomIterator atomIt = mol.beginAtoms(); atomIt != mol.endAtoms(); ++atomIt) { int atNum = (*atomIt)->getAtomicNum(); if (atNum != 1 || !onlyHeavy) { if (!(*atomIt)->getIsotope()) { res += table->getMostCommonIsotopeMass(atNum); } else { res += (*atomIt)->getMass(); } res -= constants::electronMass * (*atomIt)->getFormalCharge(); } // add our implicit Hs if we need to: if (!onlyHeavy) { nHsToCount += (*atomIt)->getTotalNumHs(false); } } if (!onlyHeavy) { res += nHsToCount * table->getMostCommonIsotopeMass(1); } return res; }
bool classifyAtoms(ROMol &mol, std::vector<double> &radii, const SASAOpts &opts) { radii.clear(); const freesasa_classifier *classifier = nullptr; switch (opts.classifier) { case SASAOpts::Protor: classifier = &freesasa_protor_classifier; break; case SASAOpts::NACCESS: classifier = &freesasa_naccess_classifier; break; case SASAOpts::OONS: classifier = &freesasa_oons_classifier; break; default: throw ValueErrorException("unknown FreeSASA classifier specified"); return false; } bool success = true; for (ROMol::AtomIterator at = mol.beginAtoms(); at != mol.endAtoms(); ++at) { Atom *atom = *at; freesasa_atom_class cls = FREESASA_ATOM_UNKNOWN; std::string classification = "Unclassified"; double radius = 0.0; const AtomMonomerInfo *info = atom->getMonomerInfo(); if (info) { const char *atom_name = info->getName().c_str(); const char *res_name = nullptr; if (info->getMonomerType() == AtomMonomerInfo::PDBRESIDUE) { res_name = ((AtomPDBResidueInfo *)info)->getResidueName().c_str(); radius = freesasa_classifier_radius(classifier, res_name, atom_name); if (radius == 0.0) { BOOST_LOG(rdWarningLog) << "Atom " << atom->getIdx() << " has zero radius" << std::endl; } cls = freesasa_classifier_class(classifier, res_name, atom_name); if (cls == FREESASA_ATOM_UNKNOWN) { BOOST_LOG(rdWarningLog) << "Atom " << atom->getIdx() << " could not be classified" << std::endl; success = false; } else { classification = freesasa_classifier_class2str(cls); } } } radii.push_back(radius); atom->setProp<int>(common_properties::Atom::SASAClass, (int)cls); atom->setProp(common_properties::Atom::SASAClassName, classification); } return success; }
void setHybridization(ROMol &mol) { ROMol::AtomIterator ai; int norbs; for (ai = mol.beginAtoms(); ai != mol.endAtoms(); ai++) { if((*ai)->getAtomicNum()==0){ (*ai)->setHybridization(Atom::UNSPECIFIED); } else { norbs = numBondsPlusLonePairs(*ai); switch(norbs) { case 0: // This occurs for things like Na+ (*ai)->setHybridization(Atom::S); break; case 1: (*ai)->setHybridization(Atom::S); break; case 2: (*ai)->setHybridization(Atom::SP); break; case 3: (*ai)->setHybridization(Atom::SP2); break; case 4: // potentially SP3, but we'll set it down to SP2 // if we have a conjugated bond (like the second O // in O=CO) // we'll also avoid setting the hybridization down to // SP2 in the case of an atom with degree higher than 3 // (e.g. things like CP1(C)=CC=CN=C1C, where the P // has norbs = 4, and a conjugated bond, but clearly should // not be SP2) // This is Issue276 if((*ai)->getDegree()>3 || !MolOps::atomHasConjugatedBond(*ai)){ (*ai)->setHybridization(Atom::SP3); } else { (*ai)->setHybridization(Atom::SP2); } break; case 5: (*ai)->setHybridization(Atom::SP3D); break; case 6: (*ai)->setHybridization(Atom::SP3D2); break; default : (*ai)->setHybridization(Atom::UNSPECIFIED); } } } }
std::string MolToSequence(const ROMol &mol) { std::string result; for (ROMol::ConstAtomIterator atomIt=mol.beginAtoms(); atomIt!=mol.endAtoms();++atomIt){ const Atom *atom = *atomIt; AtomPDBResidueInfo *info = (AtomPDBResidueInfo*)(atom->getMonomerInfo()); if (info && info->getMonomerType()==AtomMonomerInfo::PDBRESIDUE && info->getName() == " CA ") { result += getOneLetterCode(info); } } return result; }
double calcAMW(const ROMol &mol, bool onlyHeavy) { double res = 0.0; for (ROMol::ConstAtomIterator atomIt = mol.beginAtoms(); atomIt != mol.endAtoms(); ++atomIt) { int atNum = (*atomIt)->getAtomicNum(); if (atNum != 1 || !onlyHeavy) { res += (*atomIt)->getMass(); } // add our implicit Hs if we need to: if (!onlyHeavy) { const PeriodicTable *table = PeriodicTable::getTable(); res += (*atomIt)->getTotalNumHs() * table->getAtomicWeight(1); } } return res; }
void setConjugation(ROMol &mol) { // start with all bonds being marked unconjugated // except for aromatic bonds ROMol::BondIterator bi; for (bi = mol.beginBonds(); bi != mol.endBonds(); bi++) { if ((*bi)->getIsAromatic()) { (*bi)->setIsConjugated(true); } else { (*bi)->setIsConjugated(false); } } ROMol::AtomIterator ai; // loop over each atom and check if the bonds connecting to it can // be conjugated for (ai = mol.beginAtoms(); ai != mol.endAtoms(); ai++) { markConjAtomBonds(*ai); } }
/*! \brief compute the Gasteiger partial charges and return a new molecule with the charges set * * Ref : J.Gasteiger, M. Marsili, "Iterative Equalization of Oribital Electronegatiity * A Rapid Access to Atomic Charges", Tetrahedron Vol 36 p3219 1980 */ void computeGasteigerCharges(const ROMol &mol, std::vector<double> &charges, int nIter, bool throwOnParamFailure) { PRECONDITION(charges.size()>=mol.getNumAtoms(),"bad array size"); PeriodicTable *table = PeriodicTable::getTable(); const GasteigerParams *params = GasteigerParams::getParams(); double damp = DAMP; int natms = mol.getNumAtoms(); // space for parameters for each atom in the molecule std::vector<DOUBLE_VECT> atmPs; atmPs.reserve(natms); std::fill(charges.begin(),charges.end(),0.0); DOUBLE_VECT hChrg; // total charge on the implicit hydrogen on each heavy atom hChrg.resize(natms, 0.0); DOUBLE_VECT ionX; ionX.resize(natms, 0.0); DOUBLE_VECT energ; energ.resize(natms, 0.0); ROMol::ADJ_ITER nbrIdx,endIdx; // deal with the conjugated system - distribute the formal charges on atoms of same type in each // conjugated system Gasteiger::splitChargeConjugated(mol, charges); // now read in the parameters ROMol::ConstAtomIterator ai; for (ai = mol.beginAtoms(); ai != mol.endAtoms(); ai++) { std::string elem = table->getElementSymbol((*ai)->getAtomicNum()); std::string mode; switch ((*ai)->getHybridization()) { case Atom::SP3: mode = "sp3"; break; case Atom::SP2: mode = "sp2"; break; case Atom::SP: mode = "sp"; break; default: if ((*ai)->getAtomicNum() == 1) { // if it is hydrogen mode = "*"; } else if ((*ai)->getAtomicNum() == 16) { // we have a sulfur atom with no hydribidation information // check how many oxygens we have on the sulfer boost::tie(nbrIdx,endIdx) = mol.getAtomNeighbors(*ai); int no = 0; while (nbrIdx != endIdx) { if (mol.getAtomWithIdx(*nbrIdx)->getAtomicNum() == 8){ no++; } nbrIdx++; } if (no == 2) { mode = "so2"; } else if (no == 1) { mode = "so"; } else { // some other sulfur state. Default to sp3 mode = "sp3"; } } } // if we get a unknown mode or element type the // following will will throw an exception atmPs.push_back(params->getParams(elem, mode,throwOnParamFailure)); // set ionX paramters // if Hydrogen treat differently int idx = (*ai)->getIdx(); if ((*ai)->getAtomicNum() == 1) { ionX[idx] = IONXH; } else { ionX[idx] = atmPs[idx][0] + atmPs[idx][1] + atmPs[idx][2]; } } // do the iteration here int itx, aix, sgn, niHs; double enr, dq, dx, qHs, dqH; // parameters for hydrogen atoms (for case where the hydrogen are not in the // graph (implicit hydrogens) DOUBLE_VECT hParams; hParams = params->getParams("H", "*", throwOnParamFailure); /* int itmp; for (itmp = 0; itmp < 5; itmp++) { std::cout << " aq:" << charges[itmp] << "\n"; }*/ for (itx = 0; itx < nIter; itx++) { for (aix = 0; aix < natms; aix++) { //enr = p0 + charge*(p1 + p2*charge) enr = atmPs[aix][0] + charges[aix]*(atmPs[aix][1] + atmPs[aix][2]*charges[aix]); energ[aix] = enr; } for (aix = 0; aix < natms; aix++) { dq = 0.0; boost::tie(nbrIdx,endIdx) = mol.getAtomNeighbors(mol.getAtomWithIdx(aix)); while (nbrIdx != endIdx) { dx = energ[*nbrIdx] - energ[aix]; if (dx < 0.0) { sgn = 0; } else { sgn = 1; } dq += dx/( (sgn*(ionX[aix] - ionX[*nbrIdx])) + ionX[*nbrIdx]); nbrIdx++; } // now loop over the implicit hydrogens and get their contributions // since hydrogens don't connect to anything else, update their charges at the same time niHs = mol.getAtomWithIdx(aix)->getTotalNumHs(); if (niHs > 0) { qHs = hChrg[aix]/niHs; enr = hParams[0] + qHs*(hParams[1] + hParams[2]*qHs); dx = enr - energ[aix]; if (dx < 0.0) { sgn = 0; } else { sgn = 1; } dqH = dx/((sgn*(ionX[aix] - IONXH)) + IONXH); dq += (niHs*dqH); //adjust the charges on the hydrogens simultaneously (possible because each of the // hydrogens have no other neighbors) hChrg[aix] -= (niHs*dqH*damp); } charges[aix] += (damp*dq); } damp *= DAMP_SCALE; } for (aix = 0; aix < natms; aix++) { mol.getAtomWithIdx(aix)->setProp("_GasteigerCharge", charges[aix], true); // set the implicit hydrogen charges mol.getAtomWithIdx(aix)->setProp("_GasteigerHCharge", hChrg[aix], true); } }
std::string MolToHELM(const ROMol &mol) { std::vector<AtomPDBResidueInfo*> seq[10]; std::string result; bool first = true; std::string chain; int id = 1; /* First pass: Monomers */ for (ROMol::ConstAtomIterator atomIt=mol.beginAtoms(); atomIt!=mol.endAtoms();++atomIt){ const Atom *atom = *atomIt; AtomPDBResidueInfo *info = (AtomPDBResidueInfo*)(atom->getMonomerInfo()); // We can only write HELM if all atoms have PDB residue information if (!info || info->getMonomerType()!=AtomMonomerInfo::PDBRESIDUE) return ""; if (info->getName() == " CA ") { const char *mono = getHELMMonomer(info); if (!mono) return ""; if (first) { chain = info->getChainId(); result = "PEPTIDE1{"; first = false; } else if (info->getChainId() != chain) { // Nine chains should be enough? if (id == 9) return ""; id++; chain = info->getChainId(); result += "}|PEPTIDE"; result += (char)(id+'0'); result += "{"; } else result += "."; result += mono; seq[id].push_back(info); } else if (info->getResidueName() == "NH2" && info->getName() == " N ") { if (first) return ""; result += ".[am]"; } else if (info->getResidueName() == "ACE" && info->getName() == " C ") { if (first) { chain = info->getChainId(); result = "PEPTIDE1{[ac]"; first = false; } else if (info->getChainId() != chain) { // Nine chains should be enough? if (id == 9) return ""; id++; chain = info->getChainId(); result += "}|PEPTIDE"; result += (char)(id+'0'); result += "{[ac]"; } else return ""; seq[id].push_back(info); } } if (first) return ""; result += "}$"; first = true; for (ROMol::ConstBondIterator bondIt=mol.beginBonds(); bondIt!=mol.endBonds(); ++bondIt){ const Bond *bond = *bondIt; Atom *beg = bond->getBeginAtom(); Atom *end = bond->getEndAtom(); if (!beg || !end) continue; AtomPDBResidueInfo *binfo = (AtomPDBResidueInfo*)(beg->getMonomerInfo()); AtomPDBResidueInfo *einfo = (AtomPDBResidueInfo*)(end->getMonomerInfo()); if (!binfo || !einfo) continue; // Test if this is an uninteresting intra-residue bond if (binfo->getResidueNumber() == einfo->getResidueNumber() && binfo->getResidueName() == einfo->getResidueName() && binfo->getChainId() == einfo->getChainId() && binfo->getInsertionCode() == einfo->getInsertionCode()) continue; if (bond->getBondType() != Bond::SINGLE) return ""; if (IsEupeptideBond(binfo,einfo)) continue; if (!IsSupportedHELMBond(binfo,einfo)) return ""; std::string tmp = NameHELMBond(seq,binfo,einfo); if (tmp.empty()) return ""; if (!first) result += "|"; else first = false; result += tmp; } result += "$$$"; return result; }
// -------------------------------------------------- // // Calculates chiral invariants for the atoms of a molecule // These are based on Labute's proposal in: // "An Efficient Algorithm for the Determination of Topological // RS Chirality" Journal of the CCG (1996) // // -------------------------------------------------- void buildCIPInvariants(const ROMol &mol, DOUBLE_VECT &res) { PRECONDITION(res.size() >= mol.getNumAtoms(), "res vect too small"); int atsSoFar = 0; // // NOTE: // If you make modifications to this, keep in mind that it is // essential that the initial comparison of ranks behave properly. // So, though it seems like it would makes sense to include // information about the number of Hs (or charge, etc) in the CIP // invariants, this will result in bad rankings. For example, in // this molecule: OC[C@H](C)O, including the number of Hs would // cause the methyl group (atom 3) to be ranked higher than the CH2 // connected to O (atom 1). This is totally wrong. // // We also don't include any pre-existing stereochemistry information. // Though R and S assignments do factor in to the priorities of atoms, // we're starting here from scratch and we'll let the R and S stuff // be taken into account during the iterations. // for (ROMol::ConstAtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms(); ++atIt) { const unsigned short nMassBits = 10; const unsigned short maxMass = 1 << nMassBits; Atom const *atom = *atIt; unsigned long invariant = 0; int num = atom->getAtomicNum() % 128; // get an int with the deviation in the mass from the default: int mass = 0; if (atom->getIsotope()) { mass = atom->getIsotope() - PeriodicTable::getTable()->getMostCommonIsotope(atom->getAtomicNum()); if (mass >= 0) mass += 1; } mass += maxMass / 2; if (mass < 0) mass = 0; else mass = mass % maxMass; #if 0 // NOTE: the inclusion of hybridization in the invariant (as // suggested in the original paper), leads to the situation // that // C[C@@](O)(C=C)C(C)CC // and // C[C@@](O)(C=C)C(C)CO // are assigned S chirality even though the rest of the world // seems to agree that they ought to be R (atom 3, sp2, is ranked // higher than atom 5, sp3, no matter what their environments) int hyb=0; switch(atom->getHybridization()) { case Atom::SP: hyb=6;break; case Atom::SP2: hyb=5;break; case Atom::SP3: hyb=1;break; case Atom::SP3D: hyb=3;break; case Atom::SP3D2: hyb=2;break; default: break; } #endif invariant = num; // 7 bits here invariant = (invariant << nMassBits) | mass; int mapnum = -1; atom->getPropIfPresent(common_properties::molAtomMapNumber, mapnum); mapnum = (mapnum + 1) % 1024; // increment to allow map numbers of zero // (though that would be stupid) invariant = (invariant << 10) | mapnum; res[atsSoFar++] = invariant; } }
// finds all possible chiral special cases. // at the moment this is just candidates for ring stereochemistry void findChiralAtomSpecialCases(ROMol &mol, boost::dynamic_bitset<> &possibleSpecialCases) { PRECONDITION(possibleSpecialCases.size() >= mol.getNumAtoms(), "bit vector too small"); possibleSpecialCases.reset(); if (!mol.getRingInfo()->isInitialized()) { VECT_INT_VECT sssrs; MolOps::symmetrizeSSSR(mol, sssrs); } boost::dynamic_bitset<> atomsSeen(mol.getNumAtoms()); boost::dynamic_bitset<> atomsUsed(mol.getNumAtoms()); boost::dynamic_bitset<> bondsSeen(mol.getNumBonds()); for (ROMol::AtomIterator ait = mol.beginAtoms(); ait != mol.endAtoms(); ++ait) { const Atom *atom = *ait; if (atomsSeen[atom->getIdx()]) continue; if (atom->getChiralTag() == Atom::CHI_UNSPECIFIED || atom->hasProp(common_properties::_CIPCode) || !mol.getRingInfo()->numAtomRings(atom->getIdx()) || !atomIsCandidateForRingStereochem(mol, atom)) { continue; } // do a BFS from this ring atom along ring bonds and find other // stereochemistry candidates. std::list<const Atom *> nextAtoms; // start with finding viable neighbors ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = mol.getAtomBonds(atom); while (beg != end) { unsigned int bidx = mol[*beg]->getIdx(); if (!bondsSeen[bidx]) { bondsSeen.set(bidx); if (mol.getRingInfo()->numBondRings(bidx)) { const Atom *oatom = mol[*beg]->getOtherAtom(atom); if (!atomsSeen[oatom->getIdx()]) { nextAtoms.push_back(oatom); atomsUsed.set(oatom->getIdx()); } } } ++beg; } INT_VECT ringStereoAtoms(0); if (!nextAtoms.empty()) { atom->getPropIfPresent(common_properties::_ringStereoAtoms, ringStereoAtoms); } while (!nextAtoms.empty()) { const Atom *ratom = nextAtoms.front(); nextAtoms.pop_front(); atomsSeen.set(ratom->getIdx()); if (ratom->getChiralTag() != Atom::CHI_UNSPECIFIED && !ratom->hasProp(common_properties::_CIPCode) && atomIsCandidateForRingStereochem(mol, ratom)) { int same = (ratom->getChiralTag() == atom->getChiralTag()) ? 1 : -1; ringStereoAtoms.push_back(same * (ratom->getIdx() + 1)); INT_VECT oringatoms(0); ratom->getPropIfPresent(common_properties::_ringStereoAtoms, oringatoms); oringatoms.push_back(same * (atom->getIdx() + 1)); ratom->setProp(common_properties::_ringStereoAtoms, oringatoms, true); possibleSpecialCases.set(ratom->getIdx()); possibleSpecialCases.set(atom->getIdx()); } // now push this atom's neighbors boost::tie(beg, end) = mol.getAtomBonds(ratom); while (beg != end) { unsigned int bidx = mol[*beg]->getIdx(); if (!bondsSeen[bidx]) { bondsSeen.set(bidx); if (mol.getRingInfo()->numBondRings(bidx)) { const Atom *oatom = mol[*beg]->getOtherAtom(ratom); if (!atomsSeen[oatom->getIdx()] && !atomsUsed[oatom->getIdx()]) { nextAtoms.push_back(oatom); atomsUsed.set(oatom->getIdx()); } } } ++beg; } } // end of BFS if (ringStereoAtoms.size() != 0) { atom->setProp(common_properties::_ringStereoAtoms, ringStereoAtoms, true); // because we're only going to hit each ring atom once, the first atom we // encounter in a ring is going to end up with all the other atoms set as // stereoAtoms, but each of them will only have the first atom present. We // need to fix that. because the traverse from the first atom only // followed ring bonds, these things are all by definition in one ring // system. (Q: is this true if there's a spiro center in there?) INT_VECT same(mol.getNumAtoms(), 0); BOOST_FOREACH (int ringAtomEntry, ringStereoAtoms) { int ringAtomIdx = ringAtomEntry < 0 ? -ringAtomEntry - 1 : ringAtomEntry - 1; same[ringAtomIdx] = ringAtomEntry; } for (INT_VECT_CI rae = ringStereoAtoms.begin(); rae != ringStereoAtoms.end(); ++rae) { int ringAtomEntry = *rae; int ringAtomIdx = ringAtomEntry < 0 ? -ringAtomEntry - 1 : ringAtomEntry - 1; INT_VECT lringatoms(0); mol.getAtomWithIdx(ringAtomIdx) ->getPropIfPresent(common_properties::_ringStereoAtoms, lringatoms); CHECK_INVARIANT(lringatoms.size() > 0, "no other ring atoms found."); for (INT_VECT_CI orae = rae + 1; orae != ringStereoAtoms.end(); ++orae) { int oringAtomEntry = *orae; int oringAtomIdx = oringAtomEntry < 0 ? -oringAtomEntry - 1 : oringAtomEntry - 1; int theseDifferent = (ringAtomEntry < 0) ^ (oringAtomEntry < 0); lringatoms.push_back(theseDifferent ? -(oringAtomIdx + 1) : (oringAtomIdx + 1)); INT_VECT olringatoms(0); mol.getAtomWithIdx(oringAtomIdx) ->getPropIfPresent(common_properties::_ringStereoAtoms, olringatoms); CHECK_INVARIANT(olringatoms.size() > 0, "no other ring atoms found."); olringatoms.push_back(theseDifferent ? -(ringAtomIdx + 1) : (ringAtomIdx + 1)); mol.getAtomWithIdx(oringAtomIdx) ->setProp(common_properties::_ringStereoAtoms, olringatoms); } mol.getAtomWithIdx(ringAtomIdx) ->setProp(common_properties::_ringStereoAtoms, lringatoms); } } else {
INT_MAP_INT pickBondsToWedge(const ROMol &mol) { // we need ring information; make sure findSSSR has been called before // if not call now if (!mol.getRingInfo()->isInitialized()) { MolOps::findSSSR(mol); } static int noNbrs = 100; INT_VECT nChiralNbrs(mol.getNumAtoms(), noNbrs); // start by looking for bonds that are already wedged for (ROMol::ConstBondIterator cbi = mol.beginBonds(); cbi != mol.endBonds(); ++cbi) { const Bond *bond = *cbi; if (bond->getBondDir() == Bond::BEGINWEDGE || bond->getBondDir() == Bond::BEGINDASH || bond->getBondDir() == Bond::UNKNOWN) { nChiralNbrs[bond->getBeginAtomIdx()] = noNbrs + 1; // std::cerr<<"skip: "<<bond->getBeginAtomIdx()<<std::endl; } } // now rank atoms by the number of chiral neighbors or Hs they have: bool chiNbrs = false; for (ROMol::ConstAtomIterator cai = mol.beginAtoms(); cai != mol.endAtoms(); ++cai) { const Atom *at = *cai; if (nChiralNbrs[at->getIdx()] > noNbrs) { // std::cerr<<" SKIPPING1: "<<at->getIdx()<<std::endl; continue; } Atom::ChiralType type = at->getChiralTag(); if (type != Atom::CHI_TETRAHEDRAL_CW && type != Atom::CHI_TETRAHEDRAL_CCW) continue; nChiralNbrs[at->getIdx()] = 0; chiNbrs = true; ROMol::ADJ_ITER nbrIdx, endNbrs; boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(at); while (nbrIdx != endNbrs) { const ATOM_SPTR nat = mol[*nbrIdx]; ++nbrIdx; if (nat->getAtomicNum() == 1) { // special case: it's an H... we weight these especially high: nChiralNbrs[at->getIdx()] -= 10; continue; } type = nat->getChiralTag(); if (type != Atom::CHI_TETRAHEDRAL_CW && type != Atom::CHI_TETRAHEDRAL_CCW) continue; nChiralNbrs[at->getIdx()] -= 1; } } std::vector<unsigned int> indices(mol.getNumAtoms()); for (unsigned int i = 0; i < mol.getNumAtoms(); ++i) indices[i] = i; if (chiNbrs) { std::sort(indices.begin(), indices.end(), Rankers::argless<INT_VECT>(nChiralNbrs)); } #if 0 std::cerr<<" nbrs: "; std::copy(nChiralNbrs.begin(),nChiralNbrs.end(),std::ostream_iterator<int>(std::cerr," ")); std::cerr<<std::endl; std::cerr<<" order: "; std::copy(indices.begin(),indices.end(),std::ostream_iterator<int>(std::cerr," ")); std::cerr<<std::endl; #endif // picks a bond for each atom that we will wedge when we write the mol file // here is what we are going to do // - at each chiral center look for a bond that is begins at the atom and // is not yet picked to be wedged for a different chiral center, preferring // bonds to Hs // - if we do not find a bond that begins at the chiral center - we will take // the first bond that is not yet picked by any other chiral centers // we use the orders calculated above to determine which order to do the // wedging INT_MAP_INT res; BOOST_FOREACH (unsigned int idx, indices) { if (nChiralNbrs[idx] > noNbrs) { // std::cerr<<" SKIPPING2: "<<idx<<std::endl; continue; // already have a wedged bond here } const Atom *atom = mol.getAtomWithIdx(idx); Atom::ChiralType type = atom->getChiralTag(); // the indices are ordered such that all chiral atoms come first. If // this has no chiral flag, we can stop the whole loop: if (type != Atom::CHI_TETRAHEDRAL_CW && type != Atom::CHI_TETRAHEDRAL_CCW) break; RDKit::ROMol::OBOND_ITER_PAIR atomBonds = mol.getAtomBonds(atom); std::vector<std::pair<int, int> > nbrScores; while (atomBonds.first != atomBonds.second) { const Bond *bond = mol[*atomBonds.first].get(); ++atomBonds.first; // can only wedge single bonds: if (bond->getBondType() != Bond::SINGLE) continue; int bid = bond->getIdx(); if (res.find(bid) == res.end()) { // very strong preference for Hs: if (bond->getOtherAtom(atom)->getAtomicNum() == 1) { nbrScores.push_back( std::make_pair(-1000, bid)); // lower than anything else can be continue; } int nbrScore = 0; // prefer neighbors that are nonchiral or have as few chiral neighbors // as possible: int oIdx = bond->getOtherAtomIdx(idx); if (nChiralNbrs[oIdx] < noNbrs) { // the counts are negative, so we have to subtract them off nbrScore -= 10 * nChiralNbrs[oIdx]; } // prefer non-ring bonds; nbrScore += mol.getRingInfo()->numBondRings(bid); nbrScores.push_back(std::make_pair(nbrScore, bid)); } } // There's still one situation where this whole thing can fail: an unlucky // situation where all neighbors of all neighbors of an atom are chiral and // that atom ends up being the last one picked for stereochem assignment. // // We'll catch that as an error here and hope that it's as unlikely to occur // as it seems like it is. (I'm going into this knowing that it's bound to // happen; I'll kick myself and do the hard solution at that point.) CHECK_INVARIANT(nbrScores.size(), "no eligible neighbors for chiral center"); std::sort(nbrScores.begin(), nbrScores.end(), Rankers::pairLess<int, int>()); res[nbrScores[0].second] = idx; } return res; }
std::string calcMolFormula(const ROMol &mol, bool separateIsotopes, bool abbreviateHIsotopes) { std::ostringstream res; std::map<std::pair<unsigned int, std::string>, unsigned int> counts; int charge = 0; unsigned int nHs = 0; const PeriodicTable *table = PeriodicTable::getTable(); for (ROMol::ConstAtomIterator atomIt = mol.beginAtoms(); atomIt != mol.endAtoms(); ++atomIt) { int atNum = (*atomIt)->getAtomicNum(); std::pair<unsigned int, std::string> key = std::make_pair(0, table->getElementSymbol(atNum)); if (separateIsotopes) { unsigned int isotope = (*atomIt)->getIsotope(); if (abbreviateHIsotopes && atNum == 1 && (isotope == 2 || isotope == 3)) { if (isotope == 2) key.second = "D"; else key.second = "T"; } else { key.first = isotope; } } if (counts.find(key) != counts.end()) { counts[key] += 1; } else { counts[key] = 1; } nHs += (*atomIt)->getTotalNumHs(); charge += (*atomIt)->getFormalCharge(); } if (nHs) { std::pair<unsigned int, std::string> key = std::make_pair(0, "H"); if (counts.find(key) != counts.end()) { counts[key] += nHs; } else { counts[key] = nHs; } } std::list<std::pair<unsigned int, std::string> > ks; for (std::map<std::pair<unsigned int, std::string>, unsigned int>::const_iterator countIt = counts.begin(); countIt != counts.end(); ++countIt) { ks.push_back(countIt->first); } ks.sort(HillCompare); for (std::list<std::pair<unsigned int, std::string> >::const_iterator kIter = ks.begin(); kIter != ks.end(); ++kIter) { const std::pair<unsigned int, std::string> &key = *kIter; if (key.first > 0) { res << "[" << key.first << key.second << "]"; } else { res << key.second; } if (counts[key] > 1) res << counts[key]; } if (charge > 0) { res << "+"; if (charge > 1) res << charge; } else if (charge < 0) { res << "-"; if (charge < -1) res << -1 * charge; } return res.str(); }