Example #1
0
// find the neighbors for an atoms that are not connected by single bond that is
// not refBond
// if checkDir is true only neighbor atoms with bonds marked with a direction
// will be returned
void findAtomNeighborsHelper(const ROMol &mol, const Atom *atom,
                             const Bond *refBond, UINT_VECT &neighbors,
                             bool checkDir = false) {
  PRECONDITION(atom, "bad atom");
  PRECONDITION(refBond, "bad bond");
  neighbors.clear();
  ROMol::OEDGE_ITER beg, end;
  boost::tie(beg, end) = mol.getAtomBonds(atom);
  while (beg != end) {
    const BOND_SPTR bond = mol[*beg];
    Bond::BondDir dir = bond->getBondDir();
    if (bond->getBondType() == Bond::SINGLE &&
        bond->getIdx() != refBond->getIdx()) {
      if (checkDir) {
        if ((dir != Bond::ENDDOWNRIGHT) && (dir != Bond::ENDUPRIGHT)) {
          ++beg;
          continue;
        }
      }
      Atom *nbrAtom = bond->getOtherAtom(atom);
      neighbors.push_back(nbrAtom->getIdx());
    }
    ++beg;
  }
}
Example #2
0
bool atomIsCandidateForRingStereochem(const ROMol &mol, const Atom *atom) {
  PRECONDITION(atom, "bad atom");
  bool res = false;
  if (!atom->getPropIfPresent(common_properties::_ringStereochemCand, res)) {
    const RingInfo *ringInfo = mol.getRingInfo();
    if (ringInfo->isInitialized() && ringInfo->numAtomRings(atom->getIdx())) {
      ROMol::OEDGE_ITER beg, end;
      boost::tie(beg, end) = mol.getAtomBonds(atom);
      std::vector<const Atom *> nonRingNbrs;
      std::vector<const Atom *> ringNbrs;
      while (beg != end) {
        const BOND_SPTR bond = mol[*beg];
        if (!ringInfo->numBondRings(bond->getIdx())) {
          nonRingNbrs.push_back(bond->getOtherAtom(atom));
        } else {
          ringNbrs.push_back(bond->getOtherAtom(atom));
        }
        ++beg;
      }

      unsigned int rank1 = 0, rank2 = 0;
      switch (nonRingNbrs.size()) {
        case 0:
          // don't do spiro:
          res = false;
          break;
        case 1:
          if (ringNbrs.size() == 2) res = true;
          break;
        case 2:
          if (nonRingNbrs[0]->getPropIfPresent(common_properties::_CIPRank,
                                               rank1) &&
              nonRingNbrs[1]->getPropIfPresent(common_properties::_CIPRank,
                                               rank2)) {
            if (rank1 == rank2) {
              res = false;
            } else {
              res = true;
            }
          }
          break;
        default:
          res = false;
      }
    }
    atom->setProp(common_properties::_ringStereochemCand, res, 1);
  }
  return res;
}
Example #3
0
  // finds cycles
  void dfsFindCycles(ROMol &mol,int atomIdx,int inBondIdx,
                     std::vector<AtomColors> &colors,
                     const UINT_VECT &ranks,
                     INT_VECT &atomOrders,
                     VECT_INT_VECT &atomRingClosures,
                     const boost::dynamic_bitset<> *bondsInPlay,
                     const std::vector<std::string> *bondSymbols
                     ){
    Atom *atom = mol.getAtomWithIdx(atomIdx);
    atomOrders.push_back(atomIdx);

    colors[atomIdx] = GREY_NODE;

    // ---------------------
    //
    //  Build the list of possible destinations from here
    //
    // ---------------------
    std::vector< PossibleType > possibles;
    possibles.resize(0);
    ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom);
    possibles.reserve(bondsPair.second-bondsPair.first);

    while(bondsPair.first != bondsPair.second){
      BOND_SPTR theBond = mol[*(bondsPair.first)];
      bondsPair.first++;
      if(bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) continue;
      if(inBondIdx<0 || theBond->getIdx() != static_cast<unsigned int>(inBondIdx)){
        int otherIdx = theBond->getOtherAtomIdx(atomIdx);
        long rank=ranks[otherIdx];
        // ---------------------
        //
        // things are a bit more complicated if we are sitting on a
        // ring atom. we would like to traverse first to the
        // ring-closure atoms, then to atoms outside the ring first,
        // then to atoms in the ring that haven't already been visited
        // (non-ring-closure atoms).
        // 
        //  Here's how the black magic works:
        //   - non-ring atom neighbors have their original ranks
        //   - ring atom neighbors have this added to their ranks:
        //       (MAX_BONDTYPE - bondOrder)*MAX_NATOMS*MAX_NATOMS
        //   - ring-closure neighbors lose a factor of:
        //       (MAX_BONDTYPE+1)*MAX_NATOMS*MAX_NATOMS
        //
        //  This tactic biases us to traverse to non-ring neighbors first,
        //  original ordering if bond orders are all equal... crafty, neh?
        //  
        // ---------------------
        if( colors[otherIdx] == GREY_NODE ) {
          rank -= static_cast<int>(MAX_BONDTYPE+1) *
            MAX_NATOMS*MAX_NATOMS;
          if(!bondSymbols){
            rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
              MAX_NATOMS;
          } else {
            const std::string &symb=(*bondSymbols)[theBond->getIdx()];
            boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end());
            rank += (hsh%MAX_NATOMS) *  MAX_NATOMS;
          }
        } else if( theBond->getOwningMol().getRingInfo()->numBondRings(theBond->getIdx()) ){
          if(!bondSymbols){
            rank += static_cast<int>(MAX_BONDTYPE - theBond->getBondType()) *
              MAX_NATOMS*MAX_NATOMS;
          } else {
            const std::string &symb=(*bondSymbols)[theBond->getIdx()];
            boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end());
            rank += (hsh%MAX_NATOMS)*MAX_NATOMS*MAX_NATOMS;
          }
        }
       //std::cerr<<"aIdx: "<< atomIdx <<"   p: "<<otherIdx<<" Rank: "<<ranks[otherIdx] <<" "<<colors[otherIdx]<<" "<<theBond->getBondType()<<" "<<rank<<std::endl;
        possibles.push_back(PossibleType(rank,otherIdx,theBond.get()));
      }
    }

    // ---------------------
    //
    //  Sort on ranks
    //
    // ---------------------
    std::sort(possibles.begin(),possibles.end(),_possibleCompare());


    // ---------------------
    //
    //  Now work the children
    //
    // ---------------------
    for(std::vector<PossibleType>::iterator possiblesIt=possibles.begin();
        possiblesIt!=possibles.end();
        possiblesIt++){
      int possibleIdx = possiblesIt->get<1>();
      Bond *bond = possiblesIt->get<2>();
      Atom *otherAtom=mol.getAtomWithIdx(possibleIdx);
      switch(colors[possibleIdx]){
      case WHITE_NODE:
        // -----
        // we haven't seen this node at all before, traverse
        // -----
        dfsFindCycles(mol,possibleIdx,bond->getIdx(),colors,
                    ranks,atomOrders,
                    atomRingClosures,
                    bondsInPlay,bondSymbols);
        break;
      case GREY_NODE:
        // -----
        // we've seen this, but haven't finished it (we're finishing a ring)
        // -----
        atomRingClosures[possibleIdx].push_back(bond->getIdx());
        atomRingClosures[atomIdx].push_back(bond->getIdx());
        break;
      default:
        // -----
        // this node has been finished. don't do anything.
        // -----
        break;
      }
    }
    colors[atomIdx] = BLACK_NODE;
  }
Example #4
0
 void MolDraw2D::doContinuousHighlighting( const ROMol &mol ,
                                           const vector<int> *highlight_atoms ,
                                           const vector<int> *highlight_bonds ,
                                           const map<int,DrawColour> *highlight_atom_map,
                                           const map<int,DrawColour> *highlight_bond_map,
                                           const std::map<int,double> *highlight_radii
                                           ) {
   int orig_lw=lineWidth();
   int tgt_lw=lineWidth()*8;
   // try to scale lw to reflect the overall scaling:
   tgt_lw = max(orig_lw*2,min(tgt_lw,(int)(scale_/25.*tgt_lw))); // the 25 here is extremely empirical
   bool orig_fp=fillPolys();
   ROMol::VERTEX_ITER this_at , end_at;
   if(highlight_bonds){
     boost::tie( this_at , end_at ) = mol.getVertices();
     while( this_at != end_at ) {
       int this_idx = mol[*this_at]->getIdx();
       ROMol::OEDGE_ITER nbr , end_nbr;
       boost::tie( nbr , end_nbr ) = mol.getAtomBonds( mol[*this_at].get() );
       while( nbr != end_nbr ) {
         const BOND_SPTR bond = mol[*nbr];
         ++nbr;
         int nbr_idx = bond->getOtherAtomIdx( this_idx );
         if( nbr_idx < static_cast<int>( at_cds_.size() ) && nbr_idx > this_idx ) {
           if(std::find(highlight_bonds->begin(),highlight_bonds->end(),bond->getIdx()) != highlight_bonds->end()){
             DrawColour col=drawOptions().highlightColour;
             if(highlight_bond_map &&
                highlight_bond_map->find(bond->getIdx())!=highlight_bond_map->end()){
               col = highlight_bond_map->find(bond->getIdx())->second;
             }
             setLineWidth(tgt_lw);
             Point2D at1_cds = at_cds_[this_idx];
             Point2D at2_cds = at_cds_[nbr_idx];
             drawLine( at1_cds , at2_cds , col , col);
           }
         }
       }
       ++this_at;
     }
   }
   if(highlight_atoms){
     boost::tie( this_at , end_at ) = mol.getVertices();
     while( this_at != end_at ) {
       int this_idx = mol[*this_at]->getIdx();
       if(std::find(highlight_atoms->begin(),highlight_atoms->end(),this_idx) != highlight_atoms->end()){
         DrawColour col=drawOptions().highlightColour;
         if(highlight_atom_map &&
            highlight_atom_map->find(this_idx)!=highlight_atom_map->end()){
           col = highlight_atom_map->find(this_idx)->second;
         }
         Point2D p1=at_cds_[this_idx];
         Point2D p2=at_cds_[this_idx];
         double radius=0.4;
         if(highlight_radii && highlight_radii->find(this_idx)!=highlight_radii->end()){
           radius = highlight_radii->find(this_idx)->second;
         }
         Point2D offset(radius,radius);
         p1 -= offset;
         p2 += offset;
         setColour(col);
         setFillPolys(true);
         setLineWidth(1);
         drawEllipse(p1,p2);
       }
       ++this_at;
     }
   }
   setLineWidth(orig_lw);
   setFillPolys(orig_fp);
 }
Example #5
0
// construct a vector with <atomIdx,direction> pairs for
// neighbors of a given atom.  This list will only be
// non-empty if at least one of the bonds has its direction
// set.
void findAtomNeighborDirHelper(const ROMol &mol, const Atom *atom,
                               const Bond *refBond, UINT_VECT &ranks,
                               INT_PAIR_VECT &neighbors,
                               bool &hasExplicitUnknownStereo) {
  PRECONDITION(atom, "bad atom");
  PRECONDITION(refBond, "bad bond");

  bool seenDir = false;
  ROMol::OEDGE_ITER beg, end;
  boost::tie(beg, end) = mol.getAtomBonds(atom);
  while (beg != end) {
    const BOND_SPTR bond = mol[*beg];
    // check whether this bond is explictly set to have unknown stereo
    if (!hasExplicitUnknownStereo) {
      int explicit_unknown_stereo;
      if (bond->getBondDir() == Bond::UNKNOWN  // there's a squiggle bond
          || (bond->getPropIfPresent<int>(common_properties::_UnknownStereo,
                                          explicit_unknown_stereo) &&
              explicit_unknown_stereo))
        hasExplicitUnknownStereo = true;
    }

    Bond::BondDir dir = bond->getBondDir();
    if (bond->getIdx() != refBond->getIdx()) {
      if (dir == Bond::ENDDOWNRIGHT || dir == Bond::ENDUPRIGHT) {
        seenDir = true;
        // If we're considering the bond "backwards", (i.e. from end
        // to beginning, reverse the effective direction:
        if (atom != bond->getBeginAtom()) {
          if (dir == Bond::ENDDOWNRIGHT)
            dir = Bond::ENDUPRIGHT;
          else
            dir = Bond::ENDDOWNRIGHT;
        }
      }
      Atom *nbrAtom = bond->getOtherAtom(atom);
      neighbors.push_back(std::make_pair(nbrAtom->getIdx(), dir));
    }
    ++beg;
  }
  if (!seenDir) {
    neighbors.clear();
  } else {
    if (neighbors.size() == 2 &&
        ranks[neighbors[0].first] == ranks[neighbors[1].first]) {
      // the two substituents are identical, no stereochemistry here:
      neighbors.clear();
    } else {
      // it's possible that direction was set only one of the bonds, set the
      // other
      // bond's direction to be reversed:
      if (neighbors[0].second != Bond::ENDDOWNRIGHT &&
          neighbors[0].second != Bond::ENDUPRIGHT) {
        CHECK_INVARIANT(neighbors.size() > 1, "too few neighbors");
        neighbors[0].second = neighbors[1].second == Bond::ENDDOWNRIGHT
                                  ? Bond::ENDUPRIGHT
                                  : Bond::ENDDOWNRIGHT;
      } else if (neighbors.size() > 1 &&
                 neighbors[1].second != Bond::ENDDOWNRIGHT &&
                 neighbors[1].second != Bond::ENDUPRIGHT) {
        neighbors[1].second = neighbors[0].second == Bond::ENDDOWNRIGHT
                                  ? Bond::ENDUPRIGHT
                                  : Bond::ENDDOWNRIGHT;
      }
    }
  }
}
Example #6
0
static void addResult(std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >&
                          res,  // const SignatureVector& resSignature,
                      const ROMol& mol,
                      const BondVector_t& bonds_selected, size_t maxCuts) {
#ifdef _DEBUG
  std::cout << res.size() + 1 << ": ";
#endif
  RWMol em(mol);
  // loop through the bonds to delete. == deleteBonds()
  unsigned isotope = 0;
  std::map<unsigned, unsigned> isotope_track;
  for (size_t i = 0; i < bonds_selected.size(); i++) {
#ifdef _DEBUG
    {
      std::string symbol =
          em.getAtomWithIdx(bonds_selected[i].first)->getSymbol();
      int label = 0;
      em.getAtomWithIdx(bonds_selected[i].first)
          ->getPropIfPresent(common_properties::molAtomMapNumber, label);
      char a1[32];
      if (0 == label)
        sprintf(a1, "\'%s\'", symbol.c_str(), label);
      else
        sprintf(a1, "\'%s:%u\'", symbol.c_str(), label);
      symbol = em.getAtomWithIdx(bonds_selected[i].second)->getSymbol();
      label = 0;
      em.getAtomWithIdx(bonds_selected[i].second)
          ->getPropIfPresent(common_properties::molAtomMapNumber, label);
      char a2[32];
      if (0 == label)
        sprintf(a2, "\'%s\'", symbol.c_str(), label);
      else
        sprintf(a2, "\'%s:%u\'", symbol.c_str(), label);

      std::cout << "(" << bonds_selected[i].first << a1 << ","
                << bonds_selected[i].second << a2 << ") ";
    }
#endif
    isotope += 1;
    // remove the bond
    em.removeBond(bonds_selected[i].first, bonds_selected[i].second);

    // now add attachement points and set attachment point lables
    Atom* a = new Atom(0);
    a->setProp(common_properties::molAtomMapNumber, (int)isotope);
    unsigned newAtomA = em.addAtom(a, true, true);
    em.addBond(bonds_selected[i].first, newAtomA, Bond::SINGLE);
    a = new Atom(0);
    a->setProp(common_properties::molAtomMapNumber, (int)isotope);
    unsigned newAtomB = em.addAtom(a, true, true);
    em.addBond(bonds_selected[i].second, newAtomB, Bond::SINGLE);

    // keep track of where to put isotopes
    isotope_track[newAtomA] = isotope;
    isotope_track[newAtomB] = isotope;
  }
#ifdef _DEBUG
  std::cout << "\n";
#endif
  RWMOL_SPTR core, side_chains;  // core & side_chains output molecules

  if (isotope == 1) {
    side_chains = RWMOL_SPTR(new RWMol(em));  // output = '%s,%s,,%s.%s'
// DEBUG PRINT
#ifdef _DEBUG
// OK: std::cout<<res.size()+1<<" isotope="<< isotope <<","<<
// MolToSmiles(*side_chains, true) <<"\n";
#endif
  } else if (isotope >= 2) {
    std::vector<std::vector<int> > frags;
    unsigned int nFrags = MolOps::getMolFrags(em, frags);

    //#check if its a valid triple or bigger cut.  matchObj = re.search(
    //'\*.*\*.*\*', f)
    // check if exists a fragment with maxCut connection points (*.. *.. *)
    if (isotope >= 3) {
      bool valid = false;
      for (size_t i = 0; i < nFrags; i++) {
        unsigned nLabels = 0;
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          if (isotope_track.end() !=
              isotope_track.find(frags[i][ai]))  // new added atom
            ++nLabels;                           // found connection point
        }
        if (nLabels >=
            maxCuts) {  // looks like it should be selected as core !  ??????
          valid = true;
          break;
        }
      }
      if (!valid) {
#ifdef _DEBUG
        std::cout << "isotope>=3: invalid fragments. fragment with maxCut "
                     "connection points not found"
                  << "\n";
#endif
        return;
      }
    }

    size_t iCore = std::numeric_limits<size_t>::max();
    side_chains = RWMOL_SPTR(new RWMol);
    std::map<unsigned, unsigned>
        visitedBonds;  // key is bond index in source molecule
    unsigned maxAttachments = 0;
    for (size_t i = 0; i < frags.size(); i++) {
      unsigned nAttachments = 0;
      for (size_t ai = 0; ai < frags[i].size(); ai++) {
        if (isotope_track.end() !=
            isotope_track.find(
                frags[i][ai]))  // == if(a->hasProp("molAtomMapNumber"))
          ++nAttachments;
      }
      if (maxAttachments < nAttachments) maxAttachments = nAttachments;
      if (1 == nAttachments) {  // build side-chain set of molecules from
                                // selected fragment
        std::map<unsigned, unsigned>
            newAtomMap;  // key is atom index in source molecule
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          Atom* a = em.getAtomWithIdx(frags[i][ai]);
          newAtomMap[frags[i][ai]] =
              side_chains->addAtom(a->copy(), true, true);
        }
        // add all bonds from this fragment
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          Atom* a = em.getAtomWithIdx(frags[i][ai]);
          ROMol::OEDGE_ITER beg, end;
          for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) {
            const BOND_SPTR bond = em[*beg];
            if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) ||
                newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) ||
                visitedBonds.end() != visitedBonds.find(bond->getIdx()))
              continue;
            unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()];
            unsigned ai2 = newAtomMap[bond->getEndAtomIdx()];
            unsigned bi = side_chains->addBond(ai1, ai2, bond->getBondType());
            visitedBonds[bond->getIdx()] = bi;
          }
        }
      } else {  // select the core fragment
// DEBUG PRINT
#ifdef _DEBUG
        if (iCore != -1)
          std::cout << "Next CORE found. iCore=" << iCore << " New i=" << i
                    << " nAttachments=" << nAttachments << "\n";
#endif
        if (nAttachments >= maxAttachments)  // Choose a fragment with maximal
                                             // number of connection points as a
                                             // core
          iCore = i;
      }
    }
    // build core molecule from selected fragment
    if (iCore != std::numeric_limits<size_t>::max()) {
      core = RWMOL_SPTR(new RWMol);
      visitedBonds.clear();
      std::map<unsigned, unsigned>
          newAtomMap;  // key is atom index in source molecule
      for (size_t i = 0; i < frags[iCore].size(); i++) {
        unsigned ai = frags[iCore][i];
        Atom* a = em.getAtomWithIdx(ai);
        newAtomMap[ai] = core->addAtom(a->copy(), true, true);
      }
      // add all bonds from this fragment
      for (size_t ai = 0; ai < frags[iCore].size(); ai++) {
        Atom* a = em.getAtomWithIdx(frags[iCore][ai]);
        ROMol::OEDGE_ITER beg, end;
        for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) {
          const BOND_SPTR bond = em[*beg];
          if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) ||
              newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) ||
              visitedBonds.end() != visitedBonds.find(bond->getIdx()))
            continue;
          unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()];
          unsigned ai2 = newAtomMap[bond->getEndAtomIdx()];
          unsigned bi = core->addBond(ai1, ai2, bond->getBondType());
          visitedBonds[bond->getIdx()] = bi;
        }
      }
// DEBUG PRINT
#ifdef _DEBUG
// std::cout<<res.size()+1<<" isotope="<< isotope <<" "<< MolToSmiles(*core,
// true)<<", "<<MolToSmiles(*side_chains, true)<<"\n";
#endif
    }  // iCore != -1
  }
  // check for duplicates:
  bool resFound = false;
  size_t ri = 0;
  for (ri = 0; ri < res.size(); ri++) {
    const std::pair<ROMOL_SPTR, ROMOL_SPTR>& r = res[ri];
    if (side_chains->getNumAtoms() == r.second->getNumAtoms() &&
        side_chains->getNumBonds() == r.second->getNumBonds() &&
        ((NULL == core.get() && NULL == r.first.get()) ||
         (NULL != core.get() && NULL != r.first.get() &&
          core->getNumAtoms() == r.first->getNumAtoms() &&
          core->getNumBonds() == r.first->getNumBonds()))) {
      // ToDo accurate check:
      // 1. compare hash code
      if (computeMorganCodeHash(*side_chains) ==
              computeMorganCodeHash(*r.second) &&
          (NULL == core ||
           computeMorganCodeHash(*core) == computeMorganCodeHash(*r.first))) {
        // 2. final check to exclude hash collisions
        // We decided that it does not neccessary to implement
        resFound = true;
        break;
      }
    }
  }
  if (!resFound) {
    //std::cerr << "**********************" << std::endl;
    // From rfrag.py
    // now change the labels on sidechains and core
    // to get the new labels, cansmi the dot-disconnected side chains
    // the first fragment in the side chains has attachment label 1, 2nd: 2, 3rd: 3
    // then change the labels accordingly in the core
    std::map<unsigned int, int> canonicalAtomMaps;
    if( side_chains.get() ) {
      RWMol tmp_side_chain(*(side_chains.get()));
      std::vector<int> oldMaps(tmp_side_chain.getNumAtoms(), 0);
      
      // clear atom labels (they are used in canonicalization)
      //  and move them to dummy storage
      for (ROMol::AtomIterator at = tmp_side_chain.beginAtoms(); at != tmp_side_chain.endAtoms();
           ++at) {
        int label = 0;
        if ((*at)->getPropIfPresent(common_properties::molAtomMapNumber, label) ) {
          (*at)->clearProp(common_properties::molAtomMapNumber);
          oldMaps[(*at)->getIdx()] = label;
        }
      }

      const bool doIsomericSmiles = true; // should this be false???
      std::string smiles = MolToSmiles(tmp_side_chain, doIsomericSmiles);
      //std::cerr << "smiles: " << smiles << std::endl;

      // Get the canonical output order and use it to remap
      //  the atom maps int the side chains
      //  these will get reapplied to the core (if there is a core)
      const std::vector<unsigned int> &ranks = tmp_side_chain.getProp<
        std::vector<unsigned int> >(
          common_properties::_smilesAtomOutputOrder);
      
      std::vector<std::pair<unsigned int, int> > rankedAtoms;

      for(size_t idx=0;idx<ranks.size();++idx) {
        unsigned int atom_idx = ranks[idx];
        if(oldMaps[atom_idx] >0) {
          const int label = oldMaps[atom_idx];
          //std::cerr << "atom_idx: " << atom_idx << " rank: " << ranks[atom_idx] <<
          //    " molAtomMapNumber: " << label << std::endl;
          rankedAtoms.push_back(std::make_pair(idx, label));
        }
      }
      std::sort(rankedAtoms.begin(), rankedAtoms.end());
      int nextMap = 0;
      for(size_t i=0;i<rankedAtoms.size();++i) {
        if(canonicalAtomMaps.find(rankedAtoms[i].second) == canonicalAtomMaps.end()) {
          //std::cerr << "Remapping: " << rankedAtoms[i].second << " " << " to " << (i+1) <<
          //    std::endl;
          canonicalAtomMaps[rankedAtoms[i].second] = ++nextMap;
        }
      }
    }
    
    //std::cerr << "======== Remap core " << std::endl;
    if( core.get() ) { // remap core if it exists
      for (ROMol::AtomIterator at = core->beginAtoms(); at != core->endAtoms();
           ++at) {
        int label = 0;
        if ((*at)->getPropIfPresent(common_properties::molAtomMapNumber, label) ) {
          //std::cerr << "remapping core: " << label << " :" << canonicalAtomMaps[label] <<
          //    std::endl;
          (*at)->setProp(common_properties::molAtomMapNumber, canonicalAtomMaps[label]);
        }
      }
    }
    
    //std::cerr << "======== Remap side-chain " << std::endl;
    for (ROMol::AtomIterator at = side_chains->beginAtoms(); at != side_chains->endAtoms();
         ++at) {
      int label = 0;
      if ((*at)->getPropIfPresent(common_properties::molAtomMapNumber, label) ) {
        //std::cerr << "remapping side chain: " << label << " :" << 
        // canonicalAtomMaps[label] << std::endl;
        (*at)->setProp(common_properties::molAtomMapNumber, canonicalAtomMaps[label]);
      }
    }

    
    res.push_back(std::pair<ROMOL_SPTR, ROMOL_SPTR>(core, side_chains));  //
  }
#ifdef _DEBUG
  else
    std::cout << res.size() + 1 << " --- DUPLICATE Result FOUND --- ri=" << ri
              << "\n";
#endif
}
Example #7
0
static void addResult(std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >&
                          res,  // const SignatureVector& resSignature,
                      const ROMol& mol,
                      const BondVector_t& bonds_selected, size_t maxCuts) {
#ifdef _DEBUG
  std::cout << res.size() + 1 << ": ";
#endif
  RWMol em(mol);
  // loop through the bonds to delete. == deleteBonds()
  unsigned isotope = 0;
  std::map<unsigned, unsigned> isotope_track;
  for (size_t i = 0; i < bonds_selected.size(); i++) {
#ifdef _DEBUG
    {
      std::string symbol =
          em.getAtomWithIdx(bonds_selected[i].first)->getSymbol();
      int label = 0;
      em.getAtomWithIdx(bonds_selected[i].first)
          ->getPropIfPresent(common_properties::molAtomMapNumber, label);
      char a1[32];
      if (0 == label)
        sprintf(a1, "\'%s\'", symbol.c_str(), label);
      else
        sprintf(a1, "\'%s:%u\'", symbol.c_str(), label);
      symbol = em.getAtomWithIdx(bonds_selected[i].second)->getSymbol();
      label = 0;
      em.getAtomWithIdx(bonds_selected[i].second)
          ->getPropIfPresent(common_properties::molAtomMapNumber, label);
      char a2[32];
      if (0 == label)
        sprintf(a2, "\'%s\'", symbol.c_str(), label);
      else
        sprintf(a2, "\'%s:%u\'", symbol.c_str(), label);

      std::cout << "(" << bonds_selected[i].first << a1 << ","
                << bonds_selected[i].second << a2 << ") ";
    }
#endif
    isotope += 1;
    // remove the bond
    em.removeBond(bonds_selected[i].first, bonds_selected[i].second);

    // now add attachement points and set attachment point lables
    Atom* a = new Atom(0);
    a->setProp(common_properties::molAtomMapNumber, (int)isotope);
    unsigned newAtomA = em.addAtom(a, true, true);
    em.addBond(bonds_selected[i].first, newAtomA, Bond::SINGLE);
    a = new Atom(0);
    a->setProp(common_properties::molAtomMapNumber, (int)isotope);
    unsigned newAtomB = em.addAtom(a, true, true);
    em.addBond(bonds_selected[i].second, newAtomB, Bond::SINGLE);

    // keep track of where to put isotopes
    isotope_track[newAtomA] = isotope;
    isotope_track[newAtomB] = isotope;
  }
#ifdef _DEBUG
  std::cout << "\n";
#endif
  RWMOL_SPTR core, side_chains;  // core & side_chains output molecules

  if (isotope == 1) {
    side_chains = RWMOL_SPTR(new RWMol(em));  // output = '%s,%s,,%s.%s'
// DEBUG PRINT
#ifdef _DEBUG
// OK: std::cout<<res.size()+1<<" isotope="<< isotope <<","<<
// MolToSmiles(*side_chains, true) <<"\n";
#endif
  } else if (isotope >= 2) {
    std::vector<std::vector<int> > frags;
    unsigned int nFrags = MolOps::getMolFrags(em, frags);

    //#check if its a valid triple or bigger cut.  matchObj = re.search(
    //'\*.*\*.*\*', f)
    // check if exists a fragment with maxCut connection points (*.. *.. *)
    if (isotope >= 3) {
      bool valid = false;
      for (size_t i = 0; i < nFrags; i++) {
        unsigned nLabels = 0;
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          if (isotope_track.end() !=
              isotope_track.find(frags[i][ai]))  // new added atom
            ++nLabels;                           // found connection point
        }
        if (nLabels >=
            maxCuts) {  // looks like it should be selected as core !  ??????
          valid = true;
          break;
        }
      }
      if (!valid) {
#ifdef _DEBUG
        std::cout << "isotope>=3: invalid fragments. fragment with maxCut "
                     "connection points not found"
                  << "\n";
#endif
        return;
      }
    }

    size_t iCore = std::numeric_limits<size_t>::max();
    side_chains = RWMOL_SPTR(new RWMol);
    std::map<unsigned, unsigned>
        visitedBonds;  // key is bond index in source molecule
    unsigned maxAttachments = 0;
    for (size_t i = 0; i < frags.size(); i++) {
      unsigned nAttachments = 0;
      for (size_t ai = 0; ai < frags[i].size(); ai++) {
        if (isotope_track.end() !=
            isotope_track.find(
                frags[i][ai]))  // == if(a->hasProp("molAtomMapNumber"))
          ++nAttachments;
      }
      if (maxAttachments < nAttachments) maxAttachments = nAttachments;
      if (1 == nAttachments) {  // build side-chain set of molecules from
                                // selected fragment
        std::map<unsigned, unsigned>
            newAtomMap;  // key is atom index in source molecule
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          Atom* a = em.getAtomWithIdx(frags[i][ai]);
          newAtomMap[frags[i][ai]] =
              side_chains->addAtom(a->copy(), true, true);
        }
        // add all bonds from this fragment
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          Atom* a = em.getAtomWithIdx(frags[i][ai]);
          ROMol::OEDGE_ITER beg, end;
          for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) {
            const BOND_SPTR bond = em[*beg];
            if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) ||
                newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) ||
                visitedBonds.end() != visitedBonds.find(bond->getIdx()))
              continue;
            unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()];
            unsigned ai2 = newAtomMap[bond->getEndAtomIdx()];
            unsigned bi = side_chains->addBond(ai1, ai2, bond->getBondType());
            visitedBonds[bond->getIdx()] = bi;
          }
        }
      } else {  // select the core fragment
// DEBUG PRINT
#ifdef _DEBUG
        if (iCore != -1)
          std::cout << "Next CORE found. iCore=" << iCore << " New i=" << i
                    << " nAttachments=" << nAttachments << "\n";
#endif
        if (nAttachments >= maxAttachments)  // Choose a fragment with maximal
                                             // number of connection points as a
                                             // core
          iCore = i;
      }
    }
    // build core molecule from selected fragment
    if (iCore != std::numeric_limits<size_t>::max()) {
      core = RWMOL_SPTR(new RWMol);
      visitedBonds.clear();
      std::map<unsigned, unsigned>
          newAtomMap;  // key is atom index in source molecule
      for (size_t i = 0; i < frags[iCore].size(); i++) {
        unsigned ai = frags[iCore][i];
        Atom* a = em.getAtomWithIdx(ai);
        newAtomMap[ai] = core->addAtom(a->copy(), true, true);
      }
      // add all bonds from this fragment
      for (size_t ai = 0; ai < frags[iCore].size(); ai++) {
        Atom* a = em.getAtomWithIdx(frags[iCore][ai]);
        ROMol::OEDGE_ITER beg, end;
        for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) {
          const BOND_SPTR bond = em[*beg];
          if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) ||
              newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) ||
              visitedBonds.end() != visitedBonds.find(bond->getIdx()))
            continue;
          unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()];
          unsigned ai2 = newAtomMap[bond->getEndAtomIdx()];
          unsigned bi = core->addBond(ai1, ai2, bond->getBondType());
          visitedBonds[bond->getIdx()] = bi;
        }
      }
// DEBUG PRINT
#ifdef _DEBUG
// std::cout<<res.size()+1<<" isotope="<< isotope <<" "<< MolToSmiles(*core,
// true)<<", "<<MolToSmiles(*side_chains, true)<<"\n";
#endif
    }  // iCore != -1
  }
  // check for dublicates:
  bool resFound = false;
  size_t ri = 0;
  for (ri = 0; ri < res.size(); ri++) {
    const std::pair<ROMOL_SPTR, ROMOL_SPTR>& r = res[ri];
    if (side_chains->getNumAtoms() == r.second->getNumAtoms() &&
        side_chains->getNumBonds() == r.second->getNumBonds() &&
        ((NULL == core.get() && NULL == r.first.get()) ||
         (NULL != core.get() && NULL != r.first.get() &&
          core->getNumAtoms() == r.first->getNumAtoms() &&
          core->getNumBonds() == r.first->getNumBonds()))) {
      // ToDo accurate check:
      // 1. compare hash code
      if (computeMorganCodeHash(*side_chains) ==
              computeMorganCodeHash(*r.second) &&
          (NULL == core ||
           computeMorganCodeHash(*core) == computeMorganCodeHash(*r.first))) {
        // 2. final check to exclude hash collisions
        // We decided that it does not neccessary to implement
        resFound = true;
        break;
      }
    }
  }
  if (!resFound)
    res.push_back(std::pair<ROMOL_SPTR, ROMOL_SPTR>(core, side_chains));  //
#ifdef _DEBUG
  else
    std::cout << res.size() + 1 << " --- DUPLICATE Result FOUND --- ri=" << ri
              << "\n";
#endif
}
Example #8
0
  void canonicalDFSTraversal(ROMol &mol,int atomIdx,int inBondIdx,
                             std::vector<AtomColors> &colors,
                             VECT_INT_VECT &cycles,
                             INT_VECT &ranks,
                             INT_VECT &cyclesAvailable,
                             MolStack &molStack,
                             INT_VECT &atomOrders,
                             INT_VECT &bondVisitOrders,
                             VECT_INT_VECT &atomRingClosures,
                             std::vector<INT_LIST> &atomTraversalBondOrder,
                             const boost::dynamic_bitset<> *bondsInPlay,
                             const std::vector<std::string> *bondSymbols
                             ){
    PRECONDITION(colors.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(ranks.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(atomOrders.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(bondVisitOrders.size()>=mol.getNumBonds(),"vector too small");
    PRECONDITION(atomRingClosures.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(atomTraversalBondOrder.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(!bondsInPlay || bondsInPlay->size()>=mol.getNumBonds(),"bondsInPlay too small");
    PRECONDITION(!bondSymbols || bondSymbols->size()>=mol.getNumBonds(),"bondSymbols too small");

    int nAttached=0;

    Atom *atom = mol.getAtomWithIdx(atomIdx);
    INT_LIST directTravList,cycleEndList;

    molStack.push_back(MolStackElem(atom));
    atomOrders[atom->getIdx()] = molStack.size();
    colors[atomIdx] = GREY_NODE;

    // ---------------------
    //
    //  Build the list of possible destinations from here
    //
    // ---------------------
    std::vector< PossibleType > possibles;
    possibles.resize(0);
    ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom);
    possibles.reserve(bondsPair.second-bondsPair.first);

    while(bondsPair.first != bondsPair.second){
      BOND_SPTR theBond = mol[*(bondsPair.first)];
      bondsPair.first++;
      if(bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) continue;
      if(inBondIdx<0 || theBond->getIdx() != static_cast<unsigned int>(inBondIdx)){
        int otherIdx = theBond->getOtherAtomIdx(atomIdx);
        long rank=ranks[otherIdx];
        // ---------------------
        //
        // things are a bit more complicated if we are sitting on a
        // ring atom we would like to traverse first to the
        // ring-closure atoms, then to atoms outside the ring first,
        // then to atoms in the ring that haven't already been visited
        // (non-ring-closure atoms).
        // 
        //  Here's how the black magic works:
        //   - non-ring atom neighbors have their original ranks
        //   - ring atom neighbors have this added to their ranks:
        //       (Bond::OTHER - bondOrder)*MAX_NATOMS*MAX_NATOMS
        //   - ring-closure neighbors lose a factor of:
        //       (Bond::OTHER+1)*MAX_NATOMS*MAX_NATOMS
        //
        //  This tactic biases us to traverse to non-ring neighbors first,
        //  original ordering if bond orders are all equal... crafty, neh?
        //  
        // ---------------------
        if( colors[otherIdx] == GREY_NODE ) {
          rank -= static_cast<int>(Bond::OTHER+1) *
            MAX_NATOMS*MAX_NATOMS;
          if(!bondSymbols){
            rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) *
              MAX_NATOMS;
          } else {
            const std::string &symb=(*bondSymbols)[theBond->getIdx()];
            boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end());
            rank += (hsh%MAX_NATOMS) *  MAX_NATOMS;
          }
        } else if( theBond->getOwningMol().getRingInfo()->numBondRings(theBond->getIdx()) ){
          if(!bondSymbols){
            rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) *
              MAX_NATOMS*MAX_NATOMS;
          } else {
            const std::string &symb=(*bondSymbols)[theBond->getIdx()];
            boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end());
            rank += (hsh%MAX_NATOMS)*MAX_NATOMS*MAX_NATOMS;
          }
        }
        possibles.push_back(PossibleType(rank,otherIdx,theBond.get()));
      }
    }

    // ---------------------
    //
    //  Sort on ranks
    //
    // ---------------------
    std::sort(possibles.begin(),possibles.end(),_possibleComp);


    // ---------------------
    //
    //  Now work the children
    //
    // ---------------------
    std::vector<MolStack> subStacks;
    for(std::vector<PossibleType>::iterator possiblesIt=possibles.begin();
        possiblesIt!=possibles.end();
        possiblesIt++){
      MolStack subStack;
#if 0
      int possibleIdx = possiblesIt->second.first;
      Bond *bond = possiblesIt->second.second;
#endif
      int possibleIdx = possiblesIt->get<1>();
      Bond *bond = possiblesIt->get<2>();
      Atom *otherAtom=mol.getAtomWithIdx(possibleIdx);
      unsigned int lowestRingIdx;
      INT_VECT::const_iterator cAIt;
      switch(colors[possibleIdx]){
      case WHITE_NODE:
        // -----
        // we haven't seen this node at all before
        // -----

        // it might have some residual data from earlier calls, clean that up:
        if(otherAtom->hasProp("_TraversalBondIndexOrder")){
          otherAtom->clearProp("_TraversalBondIndexOrder");
        }

        directTravList.push_back(bond->getIdx());
        subStack.push_back(MolStackElem(bond,atomIdx));
        canonicalDFSTraversal(mol,possibleIdx,bond->getIdx(),colors,
                              cycles,ranks,cyclesAvailable,subStack,
                              atomOrders,bondVisitOrders,atomRingClosures,atomTraversalBondOrder,
                              bondsInPlay,bondSymbols);
        subStacks.push_back(subStack);
        nAttached += 1;
        break;
      case GREY_NODE:
        // -----
        // we've seen this, but haven't finished it (we're finishing a ring)
        // -----
        cycleEndList.push_back(bond->getIdx());
        cAIt=std::find(cyclesAvailable.begin(),
                       cyclesAvailable.end(),1);
        if(cAIt==cyclesAvailable.end()){
          throw ValueErrorException("Too many rings open at once. SMILES cannot be generated.");
        }
        lowestRingIdx =  cAIt-cyclesAvailable.begin();
        cyclesAvailable[lowestRingIdx] = 0;
        cycles[possibleIdx].push_back(lowestRingIdx);
        ++lowestRingIdx;

        bond->setProp("_TraversalRingClosureBond",lowestRingIdx);
        molStack.push_back(MolStackElem(bond,
                                        atom->getIdx()));
        molStack.push_back(MolStackElem(lowestRingIdx));

        // we need to add this bond (which closes the ring) to the traversal list for the
        // other atom as well:
        atomTraversalBondOrder[otherAtom->getIdx()].push_back(bond->getIdx());
        atomRingClosures[otherAtom->getIdx()].push_back(bond->getIdx());

        break;
      default:
        // -----
        // this node has been finished. don't do anything.
        // -----
        break;
      }
    }
    

    INT_VECT &ringClosures=atomRingClosures[atom->getIdx()];
    
    CHECK_INVARIANT(ringClosures.size()==cycles[atomIdx].size(),
                    "ring closure mismatch");
    for(unsigned int i=0;i<ringClosures.size();i++){
      int ringIdx=cycles[atomIdx][i];
      ringIdx += 1;
      molStack.push_back(MolStackElem(ringIdx));
    }
    cycles[atomIdx].resize(0);
  
    MolStack::const_iterator ciMS;
    for(int i=0;i<nAttached;i++){
      if(i<nAttached-1){
        int branchIdx=0;
        if(subStacks[i].begin()->type==MOL_STACK_ATOM){
          branchIdx=subStacks[i].begin()->obj.atom->getIdx();
        } else if(subStacks[i].begin()->type==MOL_STACK_BOND){
          branchIdx=-1*subStacks[i].begin()->obj.bond->getIdx();
        } else {
          ASSERT_INVARIANT(0,"branch started with something other than an atom or bond");
        }
        molStack.push_back(MolStackElem("(",branchIdx));
        for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){
          molStack.push_back(*ciMS);
          switch(ciMS->type){
          case MOL_STACK_ATOM:
            atomOrders[ciMS->obj.atom->getIdx()] = molStack.size();
            break;
          case MOL_STACK_BOND:
            bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size();
            break;
          default:
            break;
          }
        }
        molStack.push_back(MolStackElem(")",branchIdx));
      } else {
        for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){
          molStack.push_back(*ciMS);
          switch(ciMS->type){
          case MOL_STACK_ATOM:
            atomOrders[ciMS->obj.atom->getIdx()] = molStack.size();
            break;
          case MOL_STACK_BOND:
            bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size();
            break;
          default:
            break;
          }
        }
      }
    }

    //std::cerr<<"*****>>>>>> Traversal results for atom: "<<atom->getIdx()<<"> ";
    INT_LIST travList;
    // first push on the incoming bond:
    if(inBondIdx >= 0){
      //std::cerr<<" "<<inBondIdx;
      travList.push_back(inBondIdx);
    }

    // ... ring closures that end here:
    for(INT_LIST_CI ilci=cycleEndList.begin();ilci!=cycleEndList.end();++ilci){
      //std::cerr<<" ["<<*ilci<<"]";
      travList.push_back(*ilci);
    }


    // ... ring closures that start here:
    // if(atom->hasProp("_TraversalBondIndexOrder")){
    //   INT_LIST indirectTravList;
    //   atom->getProp("_TraversalBondIndexOrder",indirectTravList);
    //   for(INT_LIST_CI ilci=indirectTravList.begin();ilci!=indirectTravList.end();++ilci){
    //     //std::cerr<<" ("<<*ilci<<")";
    //     travList.push_back(*ilci);
    //   }
    // }
    BOOST_FOREACH(int ili,atomTraversalBondOrder[atom->getIdx()]){
      travList.push_back(ili);
    }