void setBondDirRelativeToAtom(Bond *bond, Atom *atom, Bond::BondDir dir, bool reverse, boost::dynamic_bitset<> &needsDir) { PRECONDITION(bond, "bad bond"); PRECONDITION(atom, "bad atom"); PRECONDITION(dir == Bond::ENDUPRIGHT || dir == Bond::ENDDOWNRIGHT, "bad dir"); PRECONDITION(atom == bond->getBeginAtom() || atom == bond->getEndAtom(), "atom doesn't belong to bond"); // std::cerr<<"\t\t>sbdra : bond "<<bond->getIdx()<<" atom // "<<atom->getIdx()<<" dir: " << dir << " reverse: "<<reverse<<std::endl; Atom *oAtom; if (bond->getBeginAtom() != atom) { reverse = !reverse; oAtom = bond->getBeginAtom(); } else { oAtom = bond->getEndAtom(); } if (reverse) { dir = (dir == Bond::ENDUPRIGHT ? Bond::ENDDOWNRIGHT : Bond::ENDUPRIGHT); } // to ensure maximum compatibility, even when a bond has unknown stereo (set // explicitly and recorded in _UnknownStereo property), I will still let a // direction to be computed. You must check the _UnknownStereo property to // make sure whether this bond is explictly set to have no direction info. // This makes sense because the direction info are all derived from // coordinates, the _UnknownStereo property is like extra metadata to be // used with the direction info. bond->setBondDir(dir); // std::cerr<<"\t\t\t\t -> dir "<<dir<<std::endl; // check for other single bonds around the other atom who need their // direction set and set it as demanded by the direction of this one: ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = oAtom->getOwningMol().getAtomBonds(oAtom); while (beg != end) { Bond *nbrBond = oAtom->getOwningMol()[*beg].get(); if (nbrBond != bond && needsDir[nbrBond->getIdx()]) { Bond::BondDir nbrDir = Bond::NONE; if ((nbrBond->getBeginAtom() == oAtom && bond->getBeginAtom() == oAtom) || (nbrBond->getEndAtom() == oAtom && bond->getEndAtom() == oAtom)) { // both bonds either start or end here; they *must* have different // directions: nbrDir = (dir == Bond::ENDUPRIGHT ? Bond::ENDDOWNRIGHT : Bond::ENDUPRIGHT); } else { // one starts here, the other ends here, they need to have the same // direction: nbrDir = dir; } nbrBond->setBondDir(nbrDir); needsDir[nbrBond->getIdx()] = 0; // std::cerr<<"\t\t\t\t update bond "<<nbrBond->getIdx()<<" to dir "<< // nbrDir<<std::endl; } ++beg; } }
// finds cycles void dfsFindCycles(ROMol &mol,int atomIdx,int inBondIdx, std::vector<AtomColors> &colors, const UINT_VECT &ranks, INT_VECT &atomOrders, VECT_INT_VECT &atomRingClosures, const boost::dynamic_bitset<> *bondsInPlay, const std::vector<std::string> *bondSymbols ){ Atom *atom = mol.getAtomWithIdx(atomIdx); atomOrders.push_back(atomIdx); colors[atomIdx] = GREY_NODE; // --------------------- // // Build the list of possible destinations from here // // --------------------- std::vector< PossibleType > possibles; possibles.resize(0); ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom); possibles.reserve(bondsPair.second-bondsPair.first); while(bondsPair.first != bondsPair.second){ BOND_SPTR theBond = mol[*(bondsPair.first)]; bondsPair.first++; if(bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) continue; if(inBondIdx<0 || theBond->getIdx() != static_cast<unsigned int>(inBondIdx)){ int otherIdx = theBond->getOtherAtomIdx(atomIdx); long rank=ranks[otherIdx]; // --------------------- // // things are a bit more complicated if we are sitting on a // ring atom. we would like to traverse first to the // ring-closure atoms, then to atoms outside the ring first, // then to atoms in the ring that haven't already been visited // (non-ring-closure atoms). // // Here's how the black magic works: // - non-ring atom neighbors have their original ranks // - ring atom neighbors have this added to their ranks: // (MAX_BONDTYPE - bondOrder)*MAX_NATOMS*MAX_NATOMS // - ring-closure neighbors lose a factor of: // (MAX_BONDTYPE+1)*MAX_NATOMS*MAX_NATOMS // // This tactic biases us to traverse to non-ring neighbors first, // original ordering if bond orders are all equal... crafty, neh? // // --------------------- if( colors[otherIdx] == GREY_NODE ) { rank -= static_cast<int>(MAX_BONDTYPE+1) * MAX_NATOMS*MAX_NATOMS; if(!bondSymbols){ rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) * MAX_NATOMS; } else { const std::string &symb=(*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end()); rank += (hsh%MAX_NATOMS) * MAX_NATOMS; } } else if( theBond->getOwningMol().getRingInfo()->numBondRings(theBond->getIdx()) ){ if(!bondSymbols){ rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) * MAX_NATOMS*MAX_NATOMS; } else { const std::string &symb=(*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end()); rank += (hsh%MAX_NATOMS)*MAX_NATOMS*MAX_NATOMS; } } //std::cerr<<"aIdx: "<< atomIdx <<" p: "<<otherIdx<<" Rank: "<<ranks[otherIdx] <<" "<<colors[otherIdx]<<" "<<theBond->getBondType()<<" "<<rank<<std::endl; possibles.push_back(PossibleType(rank,otherIdx,theBond.get())); } } // --------------------- // // Sort on ranks // // --------------------- std::sort(possibles.begin(),possibles.end(),_possibleCompare()); // --------------------- // // Now work the children // // --------------------- for(std::vector<PossibleType>::iterator possiblesIt=possibles.begin(); possiblesIt!=possibles.end(); possiblesIt++){ int possibleIdx = possiblesIt->get<1>(); Bond *bond = possiblesIt->get<2>(); Atom *otherAtom=mol.getAtomWithIdx(possibleIdx); switch(colors[possibleIdx]){ case WHITE_NODE: // ----- // we haven't seen this node at all before, traverse // ----- dfsFindCycles(mol,possibleIdx,bond->getIdx(),colors, ranks,atomOrders, atomRingClosures, bondsInPlay,bondSymbols); break; case GREY_NODE: // ----- // we've seen this, but haven't finished it (we're finishing a ring) // ----- atomRingClosures[possibleIdx].push_back(bond->getIdx()); atomRingClosures[atomIdx].push_back(bond->getIdx()); break; default: // ----- // this node has been finished. don't do anything. // ----- break; } } colors[atomIdx] = BLACK_NODE; }
std::string FragmentSmilesConstruct( ROMol &mol, int atomIdx, std::vector<Canon::AtomColors> &colors, const UINT_VECT &ranks, bool doKekule, bool canonical, bool doIsomericSmiles, bool allBondsExplicit, bool allHsExplicit, std::vector<unsigned int> &atomOrdering, const boost::dynamic_bitset<> *bondsInPlay = 0, const std::vector<std::string> *atomSymbols = 0, const std::vector<std::string> *bondSymbols = 0) { PRECONDITION(!bondsInPlay || bondsInPlay->size() >= mol.getNumBonds(), "bad bondsInPlay"); PRECONDITION(!atomSymbols || atomSymbols->size() >= mol.getNumAtoms(), "bad atomSymbols"); PRECONDITION(!bondSymbols || bondSymbols->size() >= mol.getNumBonds(), "bad bondSymbols"); Canon::MolStack molStack; // try to prevent excessive reallocation molStack.reserve(mol.getNumAtoms() + mol.getNumBonds()); std::stringstream res; std::map<int, int> ringClosureMap; int ringIdx, closureVal; if (!canonical) mol.setProp(common_properties::_StereochemDone, 1); std::list<unsigned int> ringClosuresToErase; Canon::canonicalizeFragment(mol, atomIdx, colors, ranks, molStack, bondsInPlay, bondSymbols, doIsomericSmiles); Bond *bond = 0; BOOST_FOREACH (Canon::MolStackElem mSE, molStack) { switch (mSE.type) { case Canon::MOL_STACK_ATOM: if (!ringClosuresToErase.empty()) { BOOST_FOREACH (unsigned int rclosure, ringClosuresToErase) { ringClosureMap.erase(rclosure); } ringClosuresToErase.clear(); } // std::cout<<"\t\tAtom: "<<mSE.obj.atom->getIdx()<<std::endl; if (!atomSymbols) { res << GetAtomSmiles(mSE.obj.atom, doKekule, bond, allHsExplicit); } else { res << (*atomSymbols)[mSE.obj.atom->getIdx()]; } atomOrdering.push_back(mSE.obj.atom->getIdx()); break; case Canon::MOL_STACK_BOND: bond = mSE.obj.bond; // std::cout<<"\t\tBond: "<<bond->getIdx()<<std::endl; if (!bondSymbols) { res << GetBondSmiles(bond, mSE.number, doKekule, allBondsExplicit); } else { res << (*bondSymbols)[bond->getIdx()]; } break; case Canon::MOL_STACK_RING: ringIdx = mSE.number; // std::cout<<"\t\tRing: "<<ringIdx; if (ringClosureMap.count(ringIdx)) { // the index is already in the map -> // we're closing a ring, so grab // the index and then delete the value: closureVal = ringClosureMap[ringIdx]; // ringClosureMap.erase(ringIdx); ringClosuresToErase.push_back(ringIdx); } else { // we're opening a new ring, find the index for it: closureVal = 1; bool done = false; // EFF: there's got to be a more efficient way to do this while (!done) { std::map<int, int>::iterator mapIt; for (mapIt = ringClosureMap.begin(); mapIt != ringClosureMap.end(); mapIt++) { if (mapIt->second == closureVal) break; } if (mapIt == ringClosureMap.end()) { done = true; } else { closureVal += 1; } } ringClosureMap[ringIdx] = closureVal; } if (closureVal >= 10) { res << "%"; } // std::cerr << " > " << closureVal <<std::endl; res << closureVal; break; case Canon::MOL_STACK_BRANCH_OPEN: res << "("; break; case Canon::MOL_STACK_BRANCH_CLOSE: res << ")"; break; default: break; }
void DetectBondStereoChemistry(ROMol &mol, const Conformer *conf) { PRECONDITION(conf, "no conformer"); #if 0 std::cerr << ">>>>>>>>>>>>>>>>>>>>>*\n"; std::cerr << ">>>>>>>>>>>>>>>>>>>>>*\n"; std::cerr << ">>>>>>>>>>>>>>>>>>>>>*\n"; std::cerr << "DBSN: "<<"\n"; std::cerr << ">>>>>>>>>>>>>>>>>>>>>*\n"; std::cerr << ">>>>>>>>>>>>>>>>>>>>>*\n"; std::cerr << ">>>>>>>>>>>>>>>>>>>>>*\n"; #endif // used to store the number of single bonds a given // single bond is adjacent to std::vector<unsigned int> singleBondCounts(mol.getNumBonds(), 0); std::vector<Bond *> bondsInPlay; VECT_INT_VECT dblBondNbrs(mol.getNumBonds()); boost::dynamic_bitset<> needsDir(mol.getNumBonds()); // find double bonds that should be considered for // stereochemistry // NOTE that we are explicitly excluding double bonds in rings // with this test. bool resetRings = false; if (!mol.getRingInfo()->isInitialized()) { resetRings = true; MolOps::fastFindRings(mol); } for (RWMol::BondIterator bondIt = mol.beginBonds(); bondIt != mol.endBonds(); ++bondIt) { if ((*bondIt)->getBondType() == Bond::DOUBLE && (*bondIt)->getStereo() != Bond::STEREOANY && (*bondIt)->getBondDir() != Bond::EITHERDOUBLE && (*bondIt)->getBeginAtom()->getDegree() > 1 && (*bondIt)->getEndAtom()->getDegree() > 1 && !(mol.getRingInfo()->numBondRings((*bondIt)->getIdx()))) { const Atom *a1 = (*bondIt)->getBeginAtom(); const Atom *a2 = (*bondIt)->getEndAtom(); ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = mol.getAtomBonds(a1); while (beg != end) { const Bond *nbrBond = mol[*beg].get(); if (nbrBond->getBondType() == Bond::SINGLE || nbrBond->getBondType() == Bond::AROMATIC) { singleBondCounts[nbrBond->getIdx()] += 1; needsDir[nbrBond->getIdx()] = 1; dblBondNbrs[(*bondIt)->getIdx()].push_back(nbrBond->getIdx()); } ++beg; } boost::tie(beg, end) = mol.getAtomBonds(a2); while (beg != end) { const Bond *nbrBond = mol[*beg].get(); if (nbrBond->getBondType() == Bond::SINGLE || nbrBond->getBondType() == Bond::AROMATIC) { singleBondCounts[nbrBond->getIdx()] += 1; needsDir[nbrBond->getIdx()] = 1; dblBondNbrs[(*bondIt)->getIdx()].push_back(nbrBond->getIdx()); } ++beg; } bondsInPlay.push_back(*bondIt); } } if (!bondsInPlay.size()) { if (resetRings) mol.getRingInfo()->reset(); return; } // order the double bonds based on the singleBondCounts of their neighbors: std::vector<std::pair<unsigned int, Bond *> > orderedBondsInPlay; for (unsigned int i = 0; i < bondsInPlay.size(); ++i) { Bond *dblBond = bondsInPlay[i]; unsigned int countHere = std::accumulate(dblBondNbrs[dblBond->getIdx()].begin(), dblBondNbrs[dblBond->getIdx()].end(), 0); // and favor double bonds that are *not* in rings. The combination of using // the sum // above (instead of the max) and this ring-membershipt test seem to fix // sf.net issue 3009836 if (!(mol.getRingInfo()->numBondRings(dblBond->getIdx()))) countHere *= 10; orderedBondsInPlay.push_back(std::make_pair(countHere, dblBond)); } std::sort(orderedBondsInPlay.begin(), orderedBondsInPlay.end()); // oof, now loop over the double bonds in that order and // update their neighbor directionalities: std::vector<std::pair<unsigned int, Bond *> >::reverse_iterator pairIter; for (pairIter = orderedBondsInPlay.rbegin(); pairIter != orderedBondsInPlay.rend(); ++pairIter) { updateDoubleBondNeighbors(mol, pairIter->second, conf, needsDir, singleBondCounts); } if (resetRings) mol.getRingInfo()->reset(); }
void updateDoubleBondNeighbors(ROMol &mol, Bond *dblBond, const Conformer *conf, boost::dynamic_bitset<> &needsDir, std::vector<unsigned int> &singleBondCounts) { // we want to deal only with double bonds: PRECONDITION(dblBond, "bad bond"); PRECONDITION(dblBond->getBondType() == Bond::DOUBLE, "not a double bond"); PRECONDITION(conf, "no conformer"); #if 0 std::cerr << "**********************\n"; std::cerr << "**********************\n"; std::cerr << "**********************\n"; std::cerr << "UDBN: "<<dblBond->getIdx()<<"\n"; #endif ROMol::OEDGE_ITER beg, end; Bond *bond1 = 0, *obond1 = 0; boost::tie(beg, end) = mol.getAtomBonds(dblBond->getBeginAtom()); while (beg != end) { Bond *tBond = mol[*beg].get(); if (tBond->getBondType() == Bond::SINGLE || tBond->getBondType() == Bond::AROMATIC) { // prefer bonds that already have their directionality set // or that are adjacent to more double bonds: if (!bond1) { bond1 = tBond; } else if (needsDir[tBond->getIdx()]) { if (singleBondCounts[tBond->getIdx()] > singleBondCounts[bond1->getIdx()]) { obond1 = bond1; bond1 = tBond; } else { obond1 = tBond; } } else { obond1 = bond1; bond1 = tBond; } } ++beg; } if (!bond1) { // no single bonds from the beginning atom, mark // the double bond as directionless and return: dblBond->setBondDir(Bond::EITHERDOUBLE); return; } Bond *bond2 = 0, *obond2 = 0; boost::tie(beg, end) = mol.getAtomBonds(dblBond->getEndAtom()); while (beg != end) { Bond *tBond = mol[*beg].get(); if (tBond->getBondType() == Bond::SINGLE || tBond->getBondType() == Bond::AROMATIC) { if (!bond2) { bond2 = tBond; } else if (needsDir[tBond->getIdx()]) { if (singleBondCounts[tBond->getIdx()] > singleBondCounts[bond2->getIdx()]) { obond2 = bond2; bond2 = tBond; } else { obond2 = tBond; } } else { // we already had a bond2 and we don't need to set the direction // on the new one, so swap. obond2 = bond2; bond2 = tBond; } } ++beg; } if (!bond2) { dblBond->setBondDir(Bond::EITHERDOUBLE); return; } CHECK_INVARIANT(bond1 && bond2, "no bonds found"); RDGeom::Point3D beginP = conf->getAtomPos(dblBond->getBeginAtomIdx()); RDGeom::Point3D endP = conf->getAtomPos(dblBond->getEndAtomIdx()); RDGeom::Point3D bond1P = conf->getAtomPos(bond1->getOtherAtomIdx(dblBond->getBeginAtomIdx())); RDGeom::Point3D bond2P = conf->getAtomPos(bond2->getOtherAtomIdx(dblBond->getEndAtomIdx())); // check for a linear arrangement of atoms on either end: bool linear = false; RDGeom::Point3D p1; RDGeom::Point3D p2; p1 = bond1P - beginP; p2 = endP - beginP; if (isLinearArrangement(p1, p2)) { if (!obond1) { linear = true; } else { // one of the bonds was linear; what about the other one? Bond *tBond = bond1; bond1 = obond1; obond1 = tBond; bond1P = conf->getAtomPos(bond1->getOtherAtomIdx(dblBond->getBeginAtomIdx())); p1 = bond1P - beginP; if (isLinearArrangement(p1, p2)) { linear = true; } } } if (!linear) { p1 = bond2P - endP; p2 = beginP - endP; if (isLinearArrangement(p1, p2)) { if (!obond2) { linear = true; } else { Bond *tBond = bond2; bond2 = obond2; obond2 = tBond; bond2P = conf->getAtomPos(bond2->getOtherAtomIdx(dblBond->getEndAtomIdx())); p1 = bond2P - beginP; if (isLinearArrangement(p1, p2)) { linear = true; } } } } if (linear) { dblBond->setBondDir(Bond::EITHERDOUBLE); return; } double ang = RDGeom::computeDihedralAngle(bond1P, beginP, endP, bond2P); bool sameTorsionDir; if (ang < M_PI / 2) { sameTorsionDir = false; } else { sameTorsionDir = true; } // std::cerr << " angle: "<<ang<<" sameTorsionDir: " <<sameTorsionDir<<"\n"; /* Time for some clarificatory text, because this gets really confusing really fast. The dihedral angle analysis above is based on viewing things with an atom order as follows: 1 \ 2 = 3 \ 4 so dihedrals > 90 correspond to sameDir=true however, the stereochemistry representation is based on something more like this: 2 \ 1 = 3 \ 4 (i.e. we consider the direction-setting single bonds to be starting at the double-bonded atom) */ bool reverseBondDir = sameTorsionDir; Atom *atom1 = dblBond->getBeginAtom(), *atom2 = dblBond->getEndAtom(); if (!needsDir[bond1->getIdx()]) { if (!needsDir[bond2->getIdx()]) { // check that we agree } else { if (bond1->getBeginAtom() != atom1) { reverseBondDir = !reverseBondDir; } setBondDirRelativeToAtom(bond2, atom2, bond1->getBondDir(), reverseBondDir, needsDir); } } else if (!needsDir[bond2->getIdx()]) { if (bond2->getBeginAtom() != atom2) { reverseBondDir = !reverseBondDir; } setBondDirRelativeToAtom(bond1, atom1, bond2->getBondDir(), reverseBondDir, needsDir); } else { setBondDirRelativeToAtom(bond1, atom1, Bond::ENDDOWNRIGHT, false, needsDir); setBondDirRelativeToAtom(bond2, atom2, Bond::ENDDOWNRIGHT, reverseBondDir, needsDir); } needsDir[bond1->getIdx()] = 0; needsDir[bond2->getIdx()] = 0; if (obond1 && needsDir[obond1->getIdx()]) { setBondDirRelativeToAtom(obond1, atom1, bond1->getBondDir(), bond1->getBeginAtom() == atom1, needsDir); needsDir[obond1->getIdx()] = 0; } if (obond2 && needsDir[obond2->getIdx()]) { setBondDirRelativeToAtom(obond2, atom2, bond2->getBondDir(), bond2->getBeginAtom() == atom2, needsDir); needsDir[obond2->getIdx()] = 0; } #if 0 std::cerr << " 1:"<<bond1->getIdx()<<" "; if(obond1) std::cerr<<obond1->getIdx()<<std::endl; else std::cerr<<"N/A"<<std::endl; std::cerr << " 2:"<<bond2->getIdx()<<" "; if(obond2) std::cerr<<obond2->getIdx()<<std::endl; else std::cerr<<"N/A"<<std::endl; std::cerr << "**********************\n"; std::cerr << "**********************\n"; std::cerr << "**********************\n"; #endif }
// // Determine bond wedge state /// Bond::BondDir DetermineBondWedgeState(const Bond *bond, const INT_MAP_INT &wedgeBonds, const Conformer *conf) { PRECONDITION(bond, "no bond"); PRECONDITION(bond->getBondType() == Bond::SINGLE, "bad bond order for wedging"); const ROMol *mol = &(bond->getOwningMol()); PRECONDITION(mol, "no mol"); Bond::BondDir res = bond->getBondDir(); if (!conf) { return res; } int bid = bond->getIdx(); INT_MAP_INT_CI wbi = wedgeBonds.find(bid); if (wbi == wedgeBonds.end()) { return res; } unsigned int waid = wbi->second; Atom *atom, *bondAtom; // = bond->getBeginAtom(); if (bond->getBeginAtom()->getIdx() == waid) { atom = bond->getBeginAtom(); bondAtom = bond->getEndAtom(); } else { atom = bond->getEndAtom(); bondAtom = bond->getBeginAtom(); } Atom::ChiralType chiralType = atom->getChiralTag(); CHECK_INVARIANT(chiralType == Atom::CHI_TETRAHEDRAL_CW || chiralType == Atom::CHI_TETRAHEDRAL_CCW, ""); // if we got this far, we really need to think about it: INT_LIST neighborBondIndices; DOUBLE_LIST neighborBondAngles; RDGeom::Point3D centerLoc, tmpPt; centerLoc = conf->getAtomPos(atom->getIdx()); tmpPt = conf->getAtomPos(bondAtom->getIdx()); centerLoc.z = 0.0; tmpPt.z = 0.0; RDGeom::Point3D refVect = centerLoc.directionVector(tmpPt); neighborBondIndices.push_back(bond->getIdx()); neighborBondAngles.push_back(0.0); ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = mol->getAtomBonds(atom); while (beg != end) { Bond *nbrBond = (*mol)[*beg].get(); Atom *otherAtom = nbrBond->getOtherAtom(atom); if (nbrBond != bond) { tmpPt = conf->getAtomPos(otherAtom->getIdx()); tmpPt.z = 0.0; RDGeom::Point3D tmpVect = centerLoc.directionVector(tmpPt); double angle = refVect.signedAngleTo(tmpVect); if (angle < 0.0) angle += 2. * M_PI; INT_LIST::iterator nbrIt = neighborBondIndices.begin(); DOUBLE_LIST::iterator angleIt = neighborBondAngles.begin(); // find the location of this neighbor in our angle-sorted list // of neighbors: while (angleIt != neighborBondAngles.end() && angle > (*angleIt)) { ++angleIt; ++nbrIt; } neighborBondAngles.insert(angleIt, angle); neighborBondIndices.insert(nbrIt, nbrBond->getIdx()); } ++beg; } // at this point, neighborBondIndices contains a list of bond // indices from the central atom. They are arranged starting // at the reference bond in CCW order (based on the current // depiction). int nSwaps = atom->getPerturbationOrder(neighborBondIndices); // in the case of three-coordinated atoms we may have to worry about // the location of the implicit hydrogen - Issue 209 // Check if we have one of these situation // // 0 1 0 2 // * \*/ // 1 - C - 2 C // // here the hydrogen will be between 1 and 2 and we need to add an additional // swap if (neighborBondAngles.size() == 3) { // three coordinated DOUBLE_LIST::iterator angleIt = neighborBondAngles.begin(); ++angleIt; // the first is the 0 (or reference bond - we will ignoire that double angle1 = (*angleIt); ++angleIt; double angle2 = (*angleIt); if (angle2 - angle1 >= (M_PI - 1e-4)) { // we have the above situation nSwaps++; } } #ifdef VERBOSE_STEREOCHEM BOOST_LOG(rdDebugLog) << "--------- " << nSwaps << std::endl; std::copy(neighborBondIndices.begin(), neighborBondIndices.end(), std::ostream_iterator<int>(BOOST_LOG(rdDebugLog), " ")); BOOST_LOG(rdDebugLog) << std::endl; std::copy(neighborBondAngles.begin(), neighborBondAngles.end(), std::ostream_iterator<double>(BOOST_LOG(rdDebugLog), " ")); BOOST_LOG(rdDebugLog) << std::endl; #endif if (chiralType == Atom::CHI_TETRAHEDRAL_CCW) { if (nSwaps % 2 == 1) { // ^ reverse) { res = Bond::BEGINDASH; } else { res = Bond::BEGINWEDGE; } } else { if (nSwaps % 2 == 1) { // ^ reverse) { res = Bond::BEGINWEDGE; } else { res = Bond::BEGINDASH; } } return res; }
bool kekulizeWorker(RWMol &mol, const INT_VECT &allAtms, boost::dynamic_bitset<> dBndCands, boost::dynamic_bitset<> dBndAdds, INT_VECT done, unsigned int maxBackTracks) { INT_DEQUE astack; INT_INT_DEQ_MAP options; int lastOpt = -1; boost::dynamic_bitset<> localBondsAdded(mol.getNumBonds()); // ok the algorithm goes something like this // - start with an atom that has been marked aromatic before // - check if it can have a double bond // - add its neighbors to the stack // - check if one of its neighbors can also have a double bond // - if yes add a double bond. // - if multiple neighbors can have double bonds - add them to a // options stack we may have to retrace out path if we chose the // wrong neighbor to add the double bond // - if double bond added update the candidates for double bond // - move to the next atom on the stack and repeat the process // - if an atom that can have multiple a double bond has no // neighbors that can take double bond - we made a mistake // earlier by picking a wrong candidate for double bond // - in this case back track to where we made the mistake int curr; INT_DEQUE btmoves; unsigned int numBT = 0; // number of back tracks so far while ((done.size() < allAtms.size()) || (astack.size() > 0)) { // pick a curr atom to work with if (astack.size() > 0) { curr = astack.front(); astack.pop_front(); } else { for (int allAtm : allAtms) { if (std::find(done.begin(), done.end(), allAtm) == done.end()) { curr = allAtm; break; } } } done.push_back(curr); // loop over the neighbors if we can add double bonds or // simply push them onto the stack INT_DEQUE opts; bool cCand = false; if (dBndCands[curr]) { cCand = true; } int ncnd; // if we are here because of backtracking if (options.find(curr) != options.end()) { opts = options[curr]; CHECK_INVARIANT(opts.size() > 0, ""); } else { RWMol::ADJ_ITER nbrIdx, endNbrs; boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(mol.getAtomWithIdx(curr)); while (nbrIdx != endNbrs) { // ignore if the neighbor has already been dealt with before if (std::find(done.begin(), done.end(), static_cast<int>(*nbrIdx)) != done.end()) { ++nbrIdx; continue; } // ignore if the neighbor is not part of the fused system if (std::find(allAtms.begin(), allAtms.end(), static_cast<int>(*nbrIdx)) == allAtms.end()) { ++nbrIdx; continue; } // if the neighbor is not on the stack add it if (std::find(astack.begin(), astack.end(), static_cast<int>(*nbrIdx)) == astack.end()) { astack.push_back(rdcast<int>(*nbrIdx)); } // check if the neighbor is also a candidate for a double bond // the refinement that we'll make to the candidate check we've already // done is to make sure that the bond is either flagged as aromatic // or involves a dummy atom. This was Issue 3525076. // This fix is not really 100% of the way there: a situation like // that for Issue 3525076 but involving a dummy atom in the cage // could lead to the same failure. The full fix would require // a fairly detailed analysis of all bonds in the molecule to determine // which of them is eligible to be converted. if (cCand && dBndCands[*nbrIdx] && (mol.getBondBetweenAtoms(curr, *nbrIdx)->getIsAromatic() || mol.getAtomWithIdx(curr)->getAtomicNum() == 0 || mol.getAtomWithIdx(*nbrIdx)->getAtomicNum() == 0)) { opts.push_back(rdcast<int>(*nbrIdx)); } // end of curr atoms can have a double bond ++nbrIdx; } // end of looping over neighbors } // now add a double bond from current to one of the neighbors if we can if (cCand) { if (opts.size() > 0) { ncnd = opts.front(); opts.pop_front(); Bond *bnd = mol.getBondBetweenAtoms(curr, ncnd); bnd->setBondType(Bond::DOUBLE); // remove current and the neighbor from the dBndCands list dBndCands[curr] = 0; dBndCands[ncnd] = 0; // add them to the list of bonds to which have been made double dBndAdds[bnd->getIdx()] = 1; localBondsAdded[bnd->getIdx()] = 1; // if this is an atom we previously visted and picked we // simply tried a different option now, overwrite the options // stored for this atoms if (options.find(curr) != options.end()) { if (opts.size() == 0) { options.erase(curr); btmoves.pop_back(); if (btmoves.size() > 0) { lastOpt = btmoves.back(); } else { lastOpt = -1; } } else { options[curr] = opts; } } else { // this is new atoms we are trying and have other // neighbors as options to add double bond store this to // the options stack, we may have made a mistake in // which one we chose and have to return here if (opts.size() > 0) { lastOpt = curr; btmoves.push_back(lastOpt); options[curr] = opts; } } } // end of adding a double bond else { // we have an atom that should be getting a double bond // but none of the neighbors can take one. Most likely // because of a wrong choice earlier so back track if ((lastOpt >= 0) && (numBT < maxBackTracks)) { // std::cerr << "PRE BACKTRACK" << std::endl; // mol.debugMol(std::cerr); backTrack(mol, options, lastOpt, done, astack, dBndCands, dBndAdds); // std::cerr << "POST BACKTRACK" << std::endl; // mol.debugMol(std::cerr); numBT++; } else { // undo any remaining changes we made while here // this was github #962 for (unsigned int bidx = 0; bidx < mol.getNumBonds(); ++bidx) { if (localBondsAdded[bidx]) { mol.getBondWithIdx(bidx)->setBondType(Bond::SINGLE); } } return false; } } // end of else try to backtrack } // end of curr atom atom being a cand for double bond } // end of while we are not done with all atoms return true; }
void SubstanceGroup::addBondWithBookmark(int mark) { PRECONDITION(dp_mol, "bad mol"); Bond *bond = dp_mol->getUniqueBondWithBookmark(mark); d_bonds.push_back(bond->getIdx()); }
void canonicalDFSTraversal(ROMol &mol,int atomIdx,int inBondIdx, std::vector<AtomColors> &colors, VECT_INT_VECT &cycles, INT_VECT &ranks, INT_VECT &cyclesAvailable, MolStack &molStack, INT_VECT &atomOrders, INT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures, std::vector<INT_LIST> &atomTraversalBondOrder, const boost::dynamic_bitset<> *bondsInPlay, const std::vector<std::string> *bondSymbols ){ PRECONDITION(colors.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(ranks.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(atomOrders.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(bondVisitOrders.size()>=mol.getNumBonds(),"vector too small"); PRECONDITION(atomRingClosures.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(atomTraversalBondOrder.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(!bondsInPlay || bondsInPlay->size()>=mol.getNumBonds(),"bondsInPlay too small"); PRECONDITION(!bondSymbols || bondSymbols->size()>=mol.getNumBonds(),"bondSymbols too small"); int nAttached=0; Atom *atom = mol.getAtomWithIdx(atomIdx); INT_LIST directTravList,cycleEndList; molStack.push_back(MolStackElem(atom)); atomOrders[atom->getIdx()] = molStack.size(); colors[atomIdx] = GREY_NODE; // --------------------- // // Build the list of possible destinations from here // // --------------------- std::vector< PossibleType > possibles; possibles.resize(0); ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom); possibles.reserve(bondsPair.second-bondsPair.first); while(bondsPair.first != bondsPair.second){ BOND_SPTR theBond = mol[*(bondsPair.first)]; bondsPair.first++; if(bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) continue; if(inBondIdx<0 || theBond->getIdx() != static_cast<unsigned int>(inBondIdx)){ int otherIdx = theBond->getOtherAtomIdx(atomIdx); long rank=ranks[otherIdx]; // --------------------- // // things are a bit more complicated if we are sitting on a // ring atom we would like to traverse first to the // ring-closure atoms, then to atoms outside the ring first, // then to atoms in the ring that haven't already been visited // (non-ring-closure atoms). // // Here's how the black magic works: // - non-ring atom neighbors have their original ranks // - ring atom neighbors have this added to their ranks: // (Bond::OTHER - bondOrder)*MAX_NATOMS*MAX_NATOMS // - ring-closure neighbors lose a factor of: // (Bond::OTHER+1)*MAX_NATOMS*MAX_NATOMS // // This tactic biases us to traverse to non-ring neighbors first, // original ordering if bond orders are all equal... crafty, neh? // // --------------------- if( colors[otherIdx] == GREY_NODE ) { rank -= static_cast<int>(Bond::OTHER+1) * MAX_NATOMS*MAX_NATOMS; if(!bondSymbols){ rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) * MAX_NATOMS; } else { const std::string &symb=(*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end()); rank += (hsh%MAX_NATOMS) * MAX_NATOMS; } } else if( theBond->getOwningMol().getRingInfo()->numBondRings(theBond->getIdx()) ){ if(!bondSymbols){ rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) * MAX_NATOMS*MAX_NATOMS; } else { const std::string &symb=(*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end()); rank += (hsh%MAX_NATOMS)*MAX_NATOMS*MAX_NATOMS; } } possibles.push_back(PossibleType(rank,otherIdx,theBond.get())); } } // --------------------- // // Sort on ranks // // --------------------- std::sort(possibles.begin(),possibles.end(),_possibleComp); // --------------------- // // Now work the children // // --------------------- std::vector<MolStack> subStacks; for(std::vector<PossibleType>::iterator possiblesIt=possibles.begin(); possiblesIt!=possibles.end(); possiblesIt++){ MolStack subStack; #if 0 int possibleIdx = possiblesIt->second.first; Bond *bond = possiblesIt->second.second; #endif int possibleIdx = possiblesIt->get<1>(); Bond *bond = possiblesIt->get<2>(); Atom *otherAtom=mol.getAtomWithIdx(possibleIdx); unsigned int lowestRingIdx; INT_VECT::const_iterator cAIt; switch(colors[possibleIdx]){ case WHITE_NODE: // ----- // we haven't seen this node at all before // ----- // it might have some residual data from earlier calls, clean that up: if(otherAtom->hasProp("_TraversalBondIndexOrder")){ otherAtom->clearProp("_TraversalBondIndexOrder"); } directTravList.push_back(bond->getIdx()); subStack.push_back(MolStackElem(bond,atomIdx)); canonicalDFSTraversal(mol,possibleIdx,bond->getIdx(),colors, cycles,ranks,cyclesAvailable,subStack, atomOrders,bondVisitOrders,atomRingClosures,atomTraversalBondOrder, bondsInPlay,bondSymbols); subStacks.push_back(subStack); nAttached += 1; break; case GREY_NODE: // ----- // we've seen this, but haven't finished it (we're finishing a ring) // ----- cycleEndList.push_back(bond->getIdx()); cAIt=std::find(cyclesAvailable.begin(), cyclesAvailable.end(),1); if(cAIt==cyclesAvailable.end()){ throw ValueErrorException("Too many rings open at once. SMILES cannot be generated."); } lowestRingIdx = cAIt-cyclesAvailable.begin(); cyclesAvailable[lowestRingIdx] = 0; cycles[possibleIdx].push_back(lowestRingIdx); ++lowestRingIdx; bond->setProp("_TraversalRingClosureBond",lowestRingIdx); molStack.push_back(MolStackElem(bond, atom->getIdx())); molStack.push_back(MolStackElem(lowestRingIdx)); // we need to add this bond (which closes the ring) to the traversal list for the // other atom as well: atomTraversalBondOrder[otherAtom->getIdx()].push_back(bond->getIdx()); atomRingClosures[otherAtom->getIdx()].push_back(bond->getIdx()); break; default: // ----- // this node has been finished. don't do anything. // ----- break; } } INT_VECT &ringClosures=atomRingClosures[atom->getIdx()]; CHECK_INVARIANT(ringClosures.size()==cycles[atomIdx].size(), "ring closure mismatch"); for(unsigned int i=0;i<ringClosures.size();i++){ int ringIdx=cycles[atomIdx][i]; ringIdx += 1; molStack.push_back(MolStackElem(ringIdx)); } cycles[atomIdx].resize(0); MolStack::const_iterator ciMS; for(int i=0;i<nAttached;i++){ if(i<nAttached-1){ int branchIdx=0; if(subStacks[i].begin()->type==MOL_STACK_ATOM){ branchIdx=subStacks[i].begin()->obj.atom->getIdx(); } else if(subStacks[i].begin()->type==MOL_STACK_BOND){ branchIdx=-1*subStacks[i].begin()->obj.bond->getIdx(); } else { ASSERT_INVARIANT(0,"branch started with something other than an atom or bond"); } molStack.push_back(MolStackElem("(",branchIdx)); for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){ molStack.push_back(*ciMS); switch(ciMS->type){ case MOL_STACK_ATOM: atomOrders[ciMS->obj.atom->getIdx()] = molStack.size(); break; case MOL_STACK_BOND: bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size(); break; default: break; } } molStack.push_back(MolStackElem(")",branchIdx)); } else { for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){ molStack.push_back(*ciMS); switch(ciMS->type){ case MOL_STACK_ATOM: atomOrders[ciMS->obj.atom->getIdx()] = molStack.size(); break; case MOL_STACK_BOND: bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size(); break; default: break; } } } } //std::cerr<<"*****>>>>>> Traversal results for atom: "<<atom->getIdx()<<"> "; INT_LIST travList; // first push on the incoming bond: if(inBondIdx >= 0){ //std::cerr<<" "<<inBondIdx; travList.push_back(inBondIdx); } // ... ring closures that end here: for(INT_LIST_CI ilci=cycleEndList.begin();ilci!=cycleEndList.end();++ilci){ //std::cerr<<" ["<<*ilci<<"]"; travList.push_back(*ilci); } // ... ring closures that start here: // if(atom->hasProp("_TraversalBondIndexOrder")){ // INT_LIST indirectTravList; // atom->getProp("_TraversalBondIndexOrder",indirectTravList); // for(INT_LIST_CI ilci=indirectTravList.begin();ilci!=indirectTravList.end();++ilci){ // //std::cerr<<" ("<<*ilci<<")"; // travList.push_back(*ilci); // } // } BOOST_FOREACH(int ili,atomTraversalBondOrder[atom->getIdx()]){ travList.push_back(ili); }