// find the neighbors for an atoms that are not connected by single bond that is // not refBond // if checkDir is true only neighbor atoms with bonds marked with a direction // will be returned void findAtomNeighborsHelper(const ROMol &mol, const Atom *atom, const Bond *refBond, UINT_VECT &neighbors, bool checkDir = false) { PRECONDITION(atom, "bad atom"); PRECONDITION(refBond, "bad bond"); neighbors.clear(); ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = mol.getAtomBonds(atom); while (beg != end) { const BOND_SPTR bond = mol[*beg]; Bond::BondDir dir = bond->getBondDir(); if (bond->getBondType() == Bond::SINGLE && bond->getIdx() != refBond->getIdx()) { if (checkDir) { if ((dir != Bond::ENDDOWNRIGHT) && (dir != Bond::ENDUPRIGHT)) { ++beg; continue; } } Atom *nbrAtom = bond->getOtherAtom(atom); neighbors.push_back(nbrAtom->getIdx()); } ++beg; } }
bool atomIsCandidateForRingStereochem(const ROMol &mol, const Atom *atom) { PRECONDITION(atom, "bad atom"); bool res = false; if (!atom->getPropIfPresent(common_properties::_ringStereochemCand, res)) { const RingInfo *ringInfo = mol.getRingInfo(); if (ringInfo->isInitialized() && ringInfo->numAtomRings(atom->getIdx())) { ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = mol.getAtomBonds(atom); std::vector<const Atom *> nonRingNbrs; std::vector<const Atom *> ringNbrs; while (beg != end) { const BOND_SPTR bond = mol[*beg]; if (!ringInfo->numBondRings(bond->getIdx())) { nonRingNbrs.push_back(bond->getOtherAtom(atom)); } else { ringNbrs.push_back(bond->getOtherAtom(atom)); } ++beg; } unsigned int rank1 = 0, rank2 = 0; switch (nonRingNbrs.size()) { case 0: // don't do spiro: res = false; break; case 1: if (ringNbrs.size() == 2) res = true; break; case 2: if (nonRingNbrs[0]->getPropIfPresent(common_properties::_CIPRank, rank1) && nonRingNbrs[1]->getPropIfPresent(common_properties::_CIPRank, rank2)) { if (rank1 == rank2) { res = false; } else { res = true; } } break; default: res = false; } } atom->setProp(common_properties::_ringStereochemCand, res, 1); } return res; }
// finds cycles void dfsFindCycles(ROMol &mol,int atomIdx,int inBondIdx, std::vector<AtomColors> &colors, const UINT_VECT &ranks, INT_VECT &atomOrders, VECT_INT_VECT &atomRingClosures, const boost::dynamic_bitset<> *bondsInPlay, const std::vector<std::string> *bondSymbols ){ Atom *atom = mol.getAtomWithIdx(atomIdx); atomOrders.push_back(atomIdx); colors[atomIdx] = GREY_NODE; // --------------------- // // Build the list of possible destinations from here // // --------------------- std::vector< PossibleType > possibles; possibles.resize(0); ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom); possibles.reserve(bondsPair.second-bondsPair.first); while(bondsPair.first != bondsPair.second){ BOND_SPTR theBond = mol[*(bondsPair.first)]; bondsPair.first++; if(bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) continue; if(inBondIdx<0 || theBond->getIdx() != static_cast<unsigned int>(inBondIdx)){ int otherIdx = theBond->getOtherAtomIdx(atomIdx); long rank=ranks[otherIdx]; // --------------------- // // things are a bit more complicated if we are sitting on a // ring atom. we would like to traverse first to the // ring-closure atoms, then to atoms outside the ring first, // then to atoms in the ring that haven't already been visited // (non-ring-closure atoms). // // Here's how the black magic works: // - non-ring atom neighbors have their original ranks // - ring atom neighbors have this added to their ranks: // (MAX_BONDTYPE - bondOrder)*MAX_NATOMS*MAX_NATOMS // - ring-closure neighbors lose a factor of: // (MAX_BONDTYPE+1)*MAX_NATOMS*MAX_NATOMS // // This tactic biases us to traverse to non-ring neighbors first, // original ordering if bond orders are all equal... crafty, neh? // // --------------------- if( colors[otherIdx] == GREY_NODE ) { rank -= static_cast<int>(MAX_BONDTYPE+1) * MAX_NATOMS*MAX_NATOMS; if(!bondSymbols){ rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) * MAX_NATOMS; } else { const std::string &symb=(*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end()); rank += (hsh%MAX_NATOMS) * MAX_NATOMS; } } else if( theBond->getOwningMol().getRingInfo()->numBondRings(theBond->getIdx()) ){ if(!bondSymbols){ rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) * MAX_NATOMS*MAX_NATOMS; } else { const std::string &symb=(*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end()); rank += (hsh%MAX_NATOMS)*MAX_NATOMS*MAX_NATOMS; } } //std::cerr<<"aIdx: "<< atomIdx <<" p: "<<otherIdx<<" Rank: "<<ranks[otherIdx] <<" "<<colors[otherIdx]<<" "<<theBond->getBondType()<<" "<<rank<<std::endl; possibles.push_back(PossibleType(rank,otherIdx,theBond.get())); } } // --------------------- // // Sort on ranks // // --------------------- std::sort(possibles.begin(),possibles.end(),_possibleCompare()); // --------------------- // // Now work the children // // --------------------- for(std::vector<PossibleType>::iterator possiblesIt=possibles.begin(); possiblesIt!=possibles.end(); possiblesIt++){ int possibleIdx = possiblesIt->get<1>(); Bond *bond = possiblesIt->get<2>(); Atom *otherAtom=mol.getAtomWithIdx(possibleIdx); switch(colors[possibleIdx]){ case WHITE_NODE: // ----- // we haven't seen this node at all before, traverse // ----- dfsFindCycles(mol,possibleIdx,bond->getIdx(),colors, ranks,atomOrders, atomRingClosures, bondsInPlay,bondSymbols); break; case GREY_NODE: // ----- // we've seen this, but haven't finished it (we're finishing a ring) // ----- atomRingClosures[possibleIdx].push_back(bond->getIdx()); atomRingClosures[atomIdx].push_back(bond->getIdx()); break; default: // ----- // this node has been finished. don't do anything. // ----- break; } } colors[atomIdx] = BLACK_NODE; }
void MolDraw2D::doContinuousHighlighting( const ROMol &mol , const vector<int> *highlight_atoms , const vector<int> *highlight_bonds , const map<int,DrawColour> *highlight_atom_map, const map<int,DrawColour> *highlight_bond_map, const std::map<int,double> *highlight_radii ) { int orig_lw=lineWidth(); int tgt_lw=lineWidth()*8; // try to scale lw to reflect the overall scaling: tgt_lw = max(orig_lw*2,min(tgt_lw,(int)(scale_/25.*tgt_lw))); // the 25 here is extremely empirical bool orig_fp=fillPolys(); ROMol::VERTEX_ITER this_at , end_at; if(highlight_bonds){ boost::tie( this_at , end_at ) = mol.getVertices(); while( this_at != end_at ) { int this_idx = mol[*this_at]->getIdx(); ROMol::OEDGE_ITER nbr , end_nbr; boost::tie( nbr , end_nbr ) = mol.getAtomBonds( mol[*this_at].get() ); while( nbr != end_nbr ) { const BOND_SPTR bond = mol[*nbr]; ++nbr; int nbr_idx = bond->getOtherAtomIdx( this_idx ); if( nbr_idx < static_cast<int>( at_cds_.size() ) && nbr_idx > this_idx ) { if(std::find(highlight_bonds->begin(),highlight_bonds->end(),bond->getIdx()) != highlight_bonds->end()){ DrawColour col=drawOptions().highlightColour; if(highlight_bond_map && highlight_bond_map->find(bond->getIdx())!=highlight_bond_map->end()){ col = highlight_bond_map->find(bond->getIdx())->second; } setLineWidth(tgt_lw); Point2D at1_cds = at_cds_[this_idx]; Point2D at2_cds = at_cds_[nbr_idx]; drawLine( at1_cds , at2_cds , col , col); } } } ++this_at; } } if(highlight_atoms){ boost::tie( this_at , end_at ) = mol.getVertices(); while( this_at != end_at ) { int this_idx = mol[*this_at]->getIdx(); if(std::find(highlight_atoms->begin(),highlight_atoms->end(),this_idx) != highlight_atoms->end()){ DrawColour col=drawOptions().highlightColour; if(highlight_atom_map && highlight_atom_map->find(this_idx)!=highlight_atom_map->end()){ col = highlight_atom_map->find(this_idx)->second; } Point2D p1=at_cds_[this_idx]; Point2D p2=at_cds_[this_idx]; double radius=0.4; if(highlight_radii && highlight_radii->find(this_idx)!=highlight_radii->end()){ radius = highlight_radii->find(this_idx)->second; } Point2D offset(radius,radius); p1 -= offset; p2 += offset; setColour(col); setFillPolys(true); setLineWidth(1); drawEllipse(p1,p2); } ++this_at; } } setLineWidth(orig_lw); setFillPolys(orig_fp); }
// construct a vector with <atomIdx,direction> pairs for // neighbors of a given atom. This list will only be // non-empty if at least one of the bonds has its direction // set. void findAtomNeighborDirHelper(const ROMol &mol, const Atom *atom, const Bond *refBond, UINT_VECT &ranks, INT_PAIR_VECT &neighbors, bool &hasExplicitUnknownStereo) { PRECONDITION(atom, "bad atom"); PRECONDITION(refBond, "bad bond"); bool seenDir = false; ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = mol.getAtomBonds(atom); while (beg != end) { const BOND_SPTR bond = mol[*beg]; // check whether this bond is explictly set to have unknown stereo if (!hasExplicitUnknownStereo) { int explicit_unknown_stereo; if (bond->getBondDir() == Bond::UNKNOWN // there's a squiggle bond || (bond->getPropIfPresent<int>(common_properties::_UnknownStereo, explicit_unknown_stereo) && explicit_unknown_stereo)) hasExplicitUnknownStereo = true; } Bond::BondDir dir = bond->getBondDir(); if (bond->getIdx() != refBond->getIdx()) { if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) { seenDir = true; // If we're considering the bond "backwards", (i.e. from end // to beginning, reverse the effective direction: if (atom != bond->getBeginAtom()) { if (dir == Bond::ENDDOWNRIGHT) dir = Bond::ENDUPRIGHT; else dir = Bond::ENDDOWNRIGHT; } } Atom *nbrAtom = bond->getOtherAtom(atom); neighbors.push_back(std::make_pair(nbrAtom->getIdx(), dir)); } ++beg; } if (!seenDir) { neighbors.clear(); } else { if (neighbors.size() == 2 && ranks[neighbors[0].first] == ranks[neighbors[1].first]) { // the two substituents are identical, no stereochemistry here: neighbors.clear(); } else { // it's possible that direction was set only one of the bonds, set the // other // bond's direction to be reversed: if (neighbors[0].second != Bond::ENDDOWNRIGHT && neighbors[0].second != Bond::ENDUPRIGHT) { CHECK_INVARIANT(neighbors.size() > 1, "too few neighbors"); neighbors[0].second = neighbors[1].second == Bond::ENDDOWNRIGHT ? Bond::ENDUPRIGHT : Bond::ENDDOWNRIGHT; } else if (neighbors.size() > 1 && neighbors[1].second != Bond::ENDDOWNRIGHT && neighbors[1].second != Bond::ENDUPRIGHT) { neighbors[1].second = neighbors[0].second == Bond::ENDDOWNRIGHT ? Bond::ENDUPRIGHT : Bond::ENDDOWNRIGHT; } } } }
static void addResult(std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >& res, // const SignatureVector& resSignature, const ROMol& mol, const BondVector_t& bonds_selected, size_t maxCuts) { #ifdef _DEBUG std::cout << res.size() + 1 << ": "; #endif RWMol em(mol); // loop through the bonds to delete. == deleteBonds() unsigned isotope = 0; std::map<unsigned, unsigned> isotope_track; for (size_t i = 0; i < bonds_selected.size(); i++) { #ifdef _DEBUG { std::string symbol = em.getAtomWithIdx(bonds_selected[i].first)->getSymbol(); int label = 0; em.getAtomWithIdx(bonds_selected[i].first) ->getPropIfPresent(common_properties::molAtomMapNumber, label); char a1[32]; if (0 == label) sprintf(a1, "\'%s\'", symbol.c_str(), label); else sprintf(a1, "\'%s:%u\'", symbol.c_str(), label); symbol = em.getAtomWithIdx(bonds_selected[i].second)->getSymbol(); label = 0; em.getAtomWithIdx(bonds_selected[i].second) ->getPropIfPresent(common_properties::molAtomMapNumber, label); char a2[32]; if (0 == label) sprintf(a2, "\'%s\'", symbol.c_str(), label); else sprintf(a2, "\'%s:%u\'", symbol.c_str(), label); std::cout << "(" << bonds_selected[i].first << a1 << "," << bonds_selected[i].second << a2 << ") "; } #endif isotope += 1; // remove the bond em.removeBond(bonds_selected[i].first, bonds_selected[i].second); // now add attachement points and set attachment point lables Atom* a = new Atom(0); a->setProp(common_properties::molAtomMapNumber, (int)isotope); unsigned newAtomA = em.addAtom(a, true, true); em.addBond(bonds_selected[i].first, newAtomA, Bond::SINGLE); a = new Atom(0); a->setProp(common_properties::molAtomMapNumber, (int)isotope); unsigned newAtomB = em.addAtom(a, true, true); em.addBond(bonds_selected[i].second, newAtomB, Bond::SINGLE); // keep track of where to put isotopes isotope_track[newAtomA] = isotope; isotope_track[newAtomB] = isotope; } #ifdef _DEBUG std::cout << "\n"; #endif RWMOL_SPTR core, side_chains; // core & side_chains output molecules if (isotope == 1) { side_chains = RWMOL_SPTR(new RWMol(em)); // output = '%s,%s,,%s.%s' // DEBUG PRINT #ifdef _DEBUG // OK: std::cout<<res.size()+1<<" isotope="<< isotope <<","<< // MolToSmiles(*side_chains, true) <<"\n"; #endif } else if (isotope >= 2) { std::vector<std::vector<int> > frags; unsigned int nFrags = MolOps::getMolFrags(em, frags); //#check if its a valid triple or bigger cut. matchObj = re.search( //'\*.*\*.*\*', f) // check if exists a fragment with maxCut connection points (*.. *.. *) if (isotope >= 3) { bool valid = false; for (size_t i = 0; i < nFrags; i++) { unsigned nLabels = 0; for (size_t ai = 0; ai < frags[i].size(); ai++) { if (isotope_track.end() != isotope_track.find(frags[i][ai])) // new added atom ++nLabels; // found connection point } if (nLabels >= maxCuts) { // looks like it should be selected as core ! ?????? valid = true; break; } } if (!valid) { #ifdef _DEBUG std::cout << "isotope>=3: invalid fragments. fragment with maxCut " "connection points not found" << "\n"; #endif return; } } size_t iCore = std::numeric_limits<size_t>::max(); side_chains = RWMOL_SPTR(new RWMol); std::map<unsigned, unsigned> visitedBonds; // key is bond index in source molecule unsigned maxAttachments = 0; for (size_t i = 0; i < frags.size(); i++) { unsigned nAttachments = 0; for (size_t ai = 0; ai < frags[i].size(); ai++) { if (isotope_track.end() != isotope_track.find( frags[i][ai])) // == if(a->hasProp("molAtomMapNumber")) ++nAttachments; } if (maxAttachments < nAttachments) maxAttachments = nAttachments; if (1 == nAttachments) { // build side-chain set of molecules from // selected fragment std::map<unsigned, unsigned> newAtomMap; // key is atom index in source molecule for (size_t ai = 0; ai < frags[i].size(); ai++) { Atom* a = em.getAtomWithIdx(frags[i][ai]); newAtomMap[frags[i][ai]] = side_chains->addAtom(a->copy(), true, true); } // add all bonds from this fragment for (size_t ai = 0; ai < frags[i].size(); ai++) { Atom* a = em.getAtomWithIdx(frags[i][ai]); ROMol::OEDGE_ITER beg, end; for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) { const BOND_SPTR bond = em[*beg]; if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) || newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) || visitedBonds.end() != visitedBonds.find(bond->getIdx())) continue; unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()]; unsigned ai2 = newAtomMap[bond->getEndAtomIdx()]; unsigned bi = side_chains->addBond(ai1, ai2, bond->getBondType()); visitedBonds[bond->getIdx()] = bi; } } } else { // select the core fragment // DEBUG PRINT #ifdef _DEBUG if (iCore != -1) std::cout << "Next CORE found. iCore=" << iCore << " New i=" << i << " nAttachments=" << nAttachments << "\n"; #endif if (nAttachments >= maxAttachments) // Choose a fragment with maximal // number of connection points as a // core iCore = i; } } // build core molecule from selected fragment if (iCore != std::numeric_limits<size_t>::max()) { core = RWMOL_SPTR(new RWMol); visitedBonds.clear(); std::map<unsigned, unsigned> newAtomMap; // key is atom index in source molecule for (size_t i = 0; i < frags[iCore].size(); i++) { unsigned ai = frags[iCore][i]; Atom* a = em.getAtomWithIdx(ai); newAtomMap[ai] = core->addAtom(a->copy(), true, true); } // add all bonds from this fragment for (size_t ai = 0; ai < frags[iCore].size(); ai++) { Atom* a = em.getAtomWithIdx(frags[iCore][ai]); ROMol::OEDGE_ITER beg, end; for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) { const BOND_SPTR bond = em[*beg]; if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) || newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) || visitedBonds.end() != visitedBonds.find(bond->getIdx())) continue; unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()]; unsigned ai2 = newAtomMap[bond->getEndAtomIdx()]; unsigned bi = core->addBond(ai1, ai2, bond->getBondType()); visitedBonds[bond->getIdx()] = bi; } } // DEBUG PRINT #ifdef _DEBUG // std::cout<<res.size()+1<<" isotope="<< isotope <<" "<< MolToSmiles(*core, // true)<<", "<<MolToSmiles(*side_chains, true)<<"\n"; #endif } // iCore != -1 } // check for duplicates: bool resFound = false; size_t ri = 0; for (ri = 0; ri < res.size(); ri++) { const std::pair<ROMOL_SPTR, ROMOL_SPTR>& r = res[ri]; if (side_chains->getNumAtoms() == r.second->getNumAtoms() && side_chains->getNumBonds() == r.second->getNumBonds() && ((NULL == core.get() && NULL == r.first.get()) || (NULL != core.get() && NULL != r.first.get() && core->getNumAtoms() == r.first->getNumAtoms() && core->getNumBonds() == r.first->getNumBonds()))) { // ToDo accurate check: // 1. compare hash code if (computeMorganCodeHash(*side_chains) == computeMorganCodeHash(*r.second) && (NULL == core || computeMorganCodeHash(*core) == computeMorganCodeHash(*r.first))) { // 2. final check to exclude hash collisions // We decided that it does not neccessary to implement resFound = true; break; } } } if (!resFound) { //std::cerr << "**********************" << std::endl; // From rfrag.py // now change the labels on sidechains and core // to get the new labels, cansmi the dot-disconnected side chains // the first fragment in the side chains has attachment label 1, 2nd: 2, 3rd: 3 // then change the labels accordingly in the core std::map<unsigned int, int> canonicalAtomMaps; if( side_chains.get() ) { RWMol tmp_side_chain(*(side_chains.get())); std::vector<int> oldMaps(tmp_side_chain.getNumAtoms(), 0); // clear atom labels (they are used in canonicalization) // and move them to dummy storage for (ROMol::AtomIterator at = tmp_side_chain.beginAtoms(); at != tmp_side_chain.endAtoms(); ++at) { int label = 0; if ((*at)->getPropIfPresent(common_properties::molAtomMapNumber, label) ) { (*at)->clearProp(common_properties::molAtomMapNumber); oldMaps[(*at)->getIdx()] = label; } } const bool doIsomericSmiles = true; // should this be false??? std::string smiles = MolToSmiles(tmp_side_chain, doIsomericSmiles); //std::cerr << "smiles: " << smiles << std::endl; // Get the canonical output order and use it to remap // the atom maps int the side chains // these will get reapplied to the core (if there is a core) const std::vector<unsigned int> &ranks = tmp_side_chain.getProp< std::vector<unsigned int> >( common_properties::_smilesAtomOutputOrder); std::vector<std::pair<unsigned int, int> > rankedAtoms; for(size_t idx=0;idx<ranks.size();++idx) { unsigned int atom_idx = ranks[idx]; if(oldMaps[atom_idx] >0) { const int label = oldMaps[atom_idx]; //std::cerr << "atom_idx: " << atom_idx << " rank: " << ranks[atom_idx] << // " molAtomMapNumber: " << label << std::endl; rankedAtoms.push_back(std::make_pair(idx, label)); } } std::sort(rankedAtoms.begin(), rankedAtoms.end()); int nextMap = 0; for(size_t i=0;i<rankedAtoms.size();++i) { if(canonicalAtomMaps.find(rankedAtoms[i].second) == canonicalAtomMaps.end()) { //std::cerr << "Remapping: " << rankedAtoms[i].second << " " << " to " << (i+1) << // std::endl; canonicalAtomMaps[rankedAtoms[i].second] = ++nextMap; } } } //std::cerr << "======== Remap core " << std::endl; if( core.get() ) { // remap core if it exists for (ROMol::AtomIterator at = core->beginAtoms(); at != core->endAtoms(); ++at) { int label = 0; if ((*at)->getPropIfPresent(common_properties::molAtomMapNumber, label) ) { //std::cerr << "remapping core: " << label << " :" << canonicalAtomMaps[label] << // std::endl; (*at)->setProp(common_properties::molAtomMapNumber, canonicalAtomMaps[label]); } } } //std::cerr << "======== Remap side-chain " << std::endl; for (ROMol::AtomIterator at = side_chains->beginAtoms(); at != side_chains->endAtoms(); ++at) { int label = 0; if ((*at)->getPropIfPresent(common_properties::molAtomMapNumber, label) ) { //std::cerr << "remapping side chain: " << label << " :" << // canonicalAtomMaps[label] << std::endl; (*at)->setProp(common_properties::molAtomMapNumber, canonicalAtomMaps[label]); } } res.push_back(std::pair<ROMOL_SPTR, ROMOL_SPTR>(core, side_chains)); // } #ifdef _DEBUG else std::cout << res.size() + 1 << " --- DUPLICATE Result FOUND --- ri=" << ri << "\n"; #endif }
static void addResult(std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >& res, // const SignatureVector& resSignature, const ROMol& mol, const BondVector_t& bonds_selected, size_t maxCuts) { #ifdef _DEBUG std::cout << res.size() + 1 << ": "; #endif RWMol em(mol); // loop through the bonds to delete. == deleteBonds() unsigned isotope = 0; std::map<unsigned, unsigned> isotope_track; for (size_t i = 0; i < bonds_selected.size(); i++) { #ifdef _DEBUG { std::string symbol = em.getAtomWithIdx(bonds_selected[i].first)->getSymbol(); int label = 0; em.getAtomWithIdx(bonds_selected[i].first) ->getPropIfPresent(common_properties::molAtomMapNumber, label); char a1[32]; if (0 == label) sprintf(a1, "\'%s\'", symbol.c_str(), label); else sprintf(a1, "\'%s:%u\'", symbol.c_str(), label); symbol = em.getAtomWithIdx(bonds_selected[i].second)->getSymbol(); label = 0; em.getAtomWithIdx(bonds_selected[i].second) ->getPropIfPresent(common_properties::molAtomMapNumber, label); char a2[32]; if (0 == label) sprintf(a2, "\'%s\'", symbol.c_str(), label); else sprintf(a2, "\'%s:%u\'", symbol.c_str(), label); std::cout << "(" << bonds_selected[i].first << a1 << "," << bonds_selected[i].second << a2 << ") "; } #endif isotope += 1; // remove the bond em.removeBond(bonds_selected[i].first, bonds_selected[i].second); // now add attachement points and set attachment point lables Atom* a = new Atom(0); a->setProp(common_properties::molAtomMapNumber, (int)isotope); unsigned newAtomA = em.addAtom(a, true, true); em.addBond(bonds_selected[i].first, newAtomA, Bond::SINGLE); a = new Atom(0); a->setProp(common_properties::molAtomMapNumber, (int)isotope); unsigned newAtomB = em.addAtom(a, true, true); em.addBond(bonds_selected[i].second, newAtomB, Bond::SINGLE); // keep track of where to put isotopes isotope_track[newAtomA] = isotope; isotope_track[newAtomB] = isotope; } #ifdef _DEBUG std::cout << "\n"; #endif RWMOL_SPTR core, side_chains; // core & side_chains output molecules if (isotope == 1) { side_chains = RWMOL_SPTR(new RWMol(em)); // output = '%s,%s,,%s.%s' // DEBUG PRINT #ifdef _DEBUG // OK: std::cout<<res.size()+1<<" isotope="<< isotope <<","<< // MolToSmiles(*side_chains, true) <<"\n"; #endif } else if (isotope >= 2) { std::vector<std::vector<int> > frags; unsigned int nFrags = MolOps::getMolFrags(em, frags); //#check if its a valid triple or bigger cut. matchObj = re.search( //'\*.*\*.*\*', f) // check if exists a fragment with maxCut connection points (*.. *.. *) if (isotope >= 3) { bool valid = false; for (size_t i = 0; i < nFrags; i++) { unsigned nLabels = 0; for (size_t ai = 0; ai < frags[i].size(); ai++) { if (isotope_track.end() != isotope_track.find(frags[i][ai])) // new added atom ++nLabels; // found connection point } if (nLabels >= maxCuts) { // looks like it should be selected as core ! ?????? valid = true; break; } } if (!valid) { #ifdef _DEBUG std::cout << "isotope>=3: invalid fragments. fragment with maxCut " "connection points not found" << "\n"; #endif return; } } size_t iCore = std::numeric_limits<size_t>::max(); side_chains = RWMOL_SPTR(new RWMol); std::map<unsigned, unsigned> visitedBonds; // key is bond index in source molecule unsigned maxAttachments = 0; for (size_t i = 0; i < frags.size(); i++) { unsigned nAttachments = 0; for (size_t ai = 0; ai < frags[i].size(); ai++) { if (isotope_track.end() != isotope_track.find( frags[i][ai])) // == if(a->hasProp("molAtomMapNumber")) ++nAttachments; } if (maxAttachments < nAttachments) maxAttachments = nAttachments; if (1 == nAttachments) { // build side-chain set of molecules from // selected fragment std::map<unsigned, unsigned> newAtomMap; // key is atom index in source molecule for (size_t ai = 0; ai < frags[i].size(); ai++) { Atom* a = em.getAtomWithIdx(frags[i][ai]); newAtomMap[frags[i][ai]] = side_chains->addAtom(a->copy(), true, true); } // add all bonds from this fragment for (size_t ai = 0; ai < frags[i].size(); ai++) { Atom* a = em.getAtomWithIdx(frags[i][ai]); ROMol::OEDGE_ITER beg, end; for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) { const BOND_SPTR bond = em[*beg]; if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) || newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) || visitedBonds.end() != visitedBonds.find(bond->getIdx())) continue; unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()]; unsigned ai2 = newAtomMap[bond->getEndAtomIdx()]; unsigned bi = side_chains->addBond(ai1, ai2, bond->getBondType()); visitedBonds[bond->getIdx()] = bi; } } } else { // select the core fragment // DEBUG PRINT #ifdef _DEBUG if (iCore != -1) std::cout << "Next CORE found. iCore=" << iCore << " New i=" << i << " nAttachments=" << nAttachments << "\n"; #endif if (nAttachments >= maxAttachments) // Choose a fragment with maximal // number of connection points as a // core iCore = i; } } // build core molecule from selected fragment if (iCore != std::numeric_limits<size_t>::max()) { core = RWMOL_SPTR(new RWMol); visitedBonds.clear(); std::map<unsigned, unsigned> newAtomMap; // key is atom index in source molecule for (size_t i = 0; i < frags[iCore].size(); i++) { unsigned ai = frags[iCore][i]; Atom* a = em.getAtomWithIdx(ai); newAtomMap[ai] = core->addAtom(a->copy(), true, true); } // add all bonds from this fragment for (size_t ai = 0; ai < frags[iCore].size(); ai++) { Atom* a = em.getAtomWithIdx(frags[iCore][ai]); ROMol::OEDGE_ITER beg, end; for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) { const BOND_SPTR bond = em[*beg]; if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) || newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) || visitedBonds.end() != visitedBonds.find(bond->getIdx())) continue; unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()]; unsigned ai2 = newAtomMap[bond->getEndAtomIdx()]; unsigned bi = core->addBond(ai1, ai2, bond->getBondType()); visitedBonds[bond->getIdx()] = bi; } } // DEBUG PRINT #ifdef _DEBUG // std::cout<<res.size()+1<<" isotope="<< isotope <<" "<< MolToSmiles(*core, // true)<<", "<<MolToSmiles(*side_chains, true)<<"\n"; #endif } // iCore != -1 } // check for dublicates: bool resFound = false; size_t ri = 0; for (ri = 0; ri < res.size(); ri++) { const std::pair<ROMOL_SPTR, ROMOL_SPTR>& r = res[ri]; if (side_chains->getNumAtoms() == r.second->getNumAtoms() && side_chains->getNumBonds() == r.second->getNumBonds() && ((NULL == core.get() && NULL == r.first.get()) || (NULL != core.get() && NULL != r.first.get() && core->getNumAtoms() == r.first->getNumAtoms() && core->getNumBonds() == r.first->getNumBonds()))) { // ToDo accurate check: // 1. compare hash code if (computeMorganCodeHash(*side_chains) == computeMorganCodeHash(*r.second) && (NULL == core || computeMorganCodeHash(*core) == computeMorganCodeHash(*r.first))) { // 2. final check to exclude hash collisions // We decided that it does not neccessary to implement resFound = true; break; } } } if (!resFound) res.push_back(std::pair<ROMOL_SPTR, ROMOL_SPTR>(core, side_chains)); // #ifdef _DEBUG else std::cout << res.size() + 1 << " --- DUPLICATE Result FOUND --- ri=" << ri << "\n"; #endif }
void canonicalDFSTraversal(ROMol &mol,int atomIdx,int inBondIdx, std::vector<AtomColors> &colors, VECT_INT_VECT &cycles, INT_VECT &ranks, INT_VECT &cyclesAvailable, MolStack &molStack, INT_VECT &atomOrders, INT_VECT &bondVisitOrders, VECT_INT_VECT &atomRingClosures, std::vector<INT_LIST> &atomTraversalBondOrder, const boost::dynamic_bitset<> *bondsInPlay, const std::vector<std::string> *bondSymbols ){ PRECONDITION(colors.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(ranks.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(atomOrders.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(bondVisitOrders.size()>=mol.getNumBonds(),"vector too small"); PRECONDITION(atomRingClosures.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(atomTraversalBondOrder.size()>=mol.getNumAtoms(),"vector too small"); PRECONDITION(!bondsInPlay || bondsInPlay->size()>=mol.getNumBonds(),"bondsInPlay too small"); PRECONDITION(!bondSymbols || bondSymbols->size()>=mol.getNumBonds(),"bondSymbols too small"); int nAttached=0; Atom *atom = mol.getAtomWithIdx(atomIdx); INT_LIST directTravList,cycleEndList; molStack.push_back(MolStackElem(atom)); atomOrders[atom->getIdx()] = molStack.size(); colors[atomIdx] = GREY_NODE; // --------------------- // // Build the list of possible destinations from here // // --------------------- std::vector< PossibleType > possibles; possibles.resize(0); ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom); possibles.reserve(bondsPair.second-bondsPair.first); while(bondsPair.first != bondsPair.second){ BOND_SPTR theBond = mol[*(bondsPair.first)]; bondsPair.first++; if(bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) continue; if(inBondIdx<0 || theBond->getIdx() != static_cast<unsigned int>(inBondIdx)){ int otherIdx = theBond->getOtherAtomIdx(atomIdx); long rank=ranks[otherIdx]; // --------------------- // // things are a bit more complicated if we are sitting on a // ring atom we would like to traverse first to the // ring-closure atoms, then to atoms outside the ring first, // then to atoms in the ring that haven't already been visited // (non-ring-closure atoms). // // Here's how the black magic works: // - non-ring atom neighbors have their original ranks // - ring atom neighbors have this added to their ranks: // (Bond::OTHER - bondOrder)*MAX_NATOMS*MAX_NATOMS // - ring-closure neighbors lose a factor of: // (Bond::OTHER+1)*MAX_NATOMS*MAX_NATOMS // // This tactic biases us to traverse to non-ring neighbors first, // original ordering if bond orders are all equal... crafty, neh? // // --------------------- if( colors[otherIdx] == GREY_NODE ) { rank -= static_cast<int>(Bond::OTHER+1) * MAX_NATOMS*MAX_NATOMS; if(!bondSymbols){ rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) * MAX_NATOMS; } else { const std::string &symb=(*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end()); rank += (hsh%MAX_NATOMS) * MAX_NATOMS; } } else if( theBond->getOwningMol().getRingInfo()->numBondRings(theBond->getIdx()) ){ if(!bondSymbols){ rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) * MAX_NATOMS*MAX_NATOMS; } else { const std::string &symb=(*bondSymbols)[theBond->getIdx()]; boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end()); rank += (hsh%MAX_NATOMS)*MAX_NATOMS*MAX_NATOMS; } } possibles.push_back(PossibleType(rank,otherIdx,theBond.get())); } } // --------------------- // // Sort on ranks // // --------------------- std::sort(possibles.begin(),possibles.end(),_possibleComp); // --------------------- // // Now work the children // // --------------------- std::vector<MolStack> subStacks; for(std::vector<PossibleType>::iterator possiblesIt=possibles.begin(); possiblesIt!=possibles.end(); possiblesIt++){ MolStack subStack; #if 0 int possibleIdx = possiblesIt->second.first; Bond *bond = possiblesIt->second.second; #endif int possibleIdx = possiblesIt->get<1>(); Bond *bond = possiblesIt->get<2>(); Atom *otherAtom=mol.getAtomWithIdx(possibleIdx); unsigned int lowestRingIdx; INT_VECT::const_iterator cAIt; switch(colors[possibleIdx]){ case WHITE_NODE: // ----- // we haven't seen this node at all before // ----- // it might have some residual data from earlier calls, clean that up: if(otherAtom->hasProp("_TraversalBondIndexOrder")){ otherAtom->clearProp("_TraversalBondIndexOrder"); } directTravList.push_back(bond->getIdx()); subStack.push_back(MolStackElem(bond,atomIdx)); canonicalDFSTraversal(mol,possibleIdx,bond->getIdx(),colors, cycles,ranks,cyclesAvailable,subStack, atomOrders,bondVisitOrders,atomRingClosures,atomTraversalBondOrder, bondsInPlay,bondSymbols); subStacks.push_back(subStack); nAttached += 1; break; case GREY_NODE: // ----- // we've seen this, but haven't finished it (we're finishing a ring) // ----- cycleEndList.push_back(bond->getIdx()); cAIt=std::find(cyclesAvailable.begin(), cyclesAvailable.end(),1); if(cAIt==cyclesAvailable.end()){ throw ValueErrorException("Too many rings open at once. SMILES cannot be generated."); } lowestRingIdx = cAIt-cyclesAvailable.begin(); cyclesAvailable[lowestRingIdx] = 0; cycles[possibleIdx].push_back(lowestRingIdx); ++lowestRingIdx; bond->setProp("_TraversalRingClosureBond",lowestRingIdx); molStack.push_back(MolStackElem(bond, atom->getIdx())); molStack.push_back(MolStackElem(lowestRingIdx)); // we need to add this bond (which closes the ring) to the traversal list for the // other atom as well: atomTraversalBondOrder[otherAtom->getIdx()].push_back(bond->getIdx()); atomRingClosures[otherAtom->getIdx()].push_back(bond->getIdx()); break; default: // ----- // this node has been finished. don't do anything. // ----- break; } } INT_VECT &ringClosures=atomRingClosures[atom->getIdx()]; CHECK_INVARIANT(ringClosures.size()==cycles[atomIdx].size(), "ring closure mismatch"); for(unsigned int i=0;i<ringClosures.size();i++){ int ringIdx=cycles[atomIdx][i]; ringIdx += 1; molStack.push_back(MolStackElem(ringIdx)); } cycles[atomIdx].resize(0); MolStack::const_iterator ciMS; for(int i=0;i<nAttached;i++){ if(i<nAttached-1){ int branchIdx=0; if(subStacks[i].begin()->type==MOL_STACK_ATOM){ branchIdx=subStacks[i].begin()->obj.atom->getIdx(); } else if(subStacks[i].begin()->type==MOL_STACK_BOND){ branchIdx=-1*subStacks[i].begin()->obj.bond->getIdx(); } else { ASSERT_INVARIANT(0,"branch started with something other than an atom or bond"); } molStack.push_back(MolStackElem("(",branchIdx)); for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){ molStack.push_back(*ciMS); switch(ciMS->type){ case MOL_STACK_ATOM: atomOrders[ciMS->obj.atom->getIdx()] = molStack.size(); break; case MOL_STACK_BOND: bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size(); break; default: break; } } molStack.push_back(MolStackElem(")",branchIdx)); } else { for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){ molStack.push_back(*ciMS); switch(ciMS->type){ case MOL_STACK_ATOM: atomOrders[ciMS->obj.atom->getIdx()] = molStack.size(); break; case MOL_STACK_BOND: bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size(); break; default: break; } } } } //std::cerr<<"*****>>>>>> Traversal results for atom: "<<atom->getIdx()<<"> "; INT_LIST travList; // first push on the incoming bond: if(inBondIdx >= 0){ //std::cerr<<" "<<inBondIdx; travList.push_back(inBondIdx); } // ... ring closures that end here: for(INT_LIST_CI ilci=cycleEndList.begin();ilci!=cycleEndList.end();++ilci){ //std::cerr<<" ["<<*ilci<<"]"; travList.push_back(*ilci); } // ... ring closures that start here: // if(atom->hasProp("_TraversalBondIndexOrder")){ // INT_LIST indirectTravList; // atom->getProp("_TraversalBondIndexOrder",indirectTravList); // for(INT_LIST_CI ilci=indirectTravList.begin();ilci!=indirectTravList.end();++ilci){ // //std::cerr<<" ("<<*ilci<<")"; // travList.push_back(*ilci); // } // } BOOST_FOREACH(int ili,atomTraversalBondOrder[atom->getIdx()]){ travList.push_back(ili); }