Example #1
0
RWMOL_SPTR convertTemplateToMol(const ROMOL_SPTR prodTemplateSptr) {
  const ROMol *prodTemplate = prodTemplateSptr.get();
  auto *res = new RWMol();

  // --------- --------- --------- --------- --------- ---------
  // Initialize by making a copy of the product template as a normal molecule.
  // NOTE that we can't just use a normal copy because we do not want to end up
  // with query atoms or bonds in the product.

  // copy in the atoms:
  ROMol::ATOM_ITER_PAIR atItP = prodTemplate->getVertices();
  while (atItP.first != atItP.second) {
    const Atom *oAtom = (*prodTemplate)[*(atItP.first++)];
    auto *newAtom = new Atom(*oAtom);
    res->addAtom(newAtom, false, true);
    int mapNum;
    if (newAtom->getPropIfPresent(common_properties::molAtomMapNumber,
                                  mapNum)) {
      // set bookmarks for the mapped atoms:
      res->setAtomBookmark(newAtom, mapNum);
      // now clear the molAtomMapNumber property so that it doesn't
      // end up in the products (this was bug 3140490):
      newAtom->clearProp(common_properties::molAtomMapNumber);
      newAtom->setProp<int>(common_properties::reactionMapNum, mapNum);
    }

    newAtom->setChiralTag(Atom::CHI_UNSPECIFIED);
    // if the product-template atom has the inversion flag set
    // to 4 (=SET), then bring its stereochem over, otherwise we'll
    // ignore it:
    int iFlag;
    if (oAtom->getPropIfPresent(common_properties::molInversionFlag, iFlag)) {
      if (iFlag == 4) newAtom->setChiralTag(oAtom->getChiralTag());
    }

    // check for properties we need to set:
    int val;
    if (newAtom->getPropIfPresent(common_properties::_QueryFormalCharge, val)) {
      newAtom->setFormalCharge(val);
    }
    if (newAtom->getPropIfPresent(common_properties::_QueryHCount, val)) {
      newAtom->setNumExplicitHs(val);
      newAtom->setNoImplicit(true);  // this was github #1544
    }
    if (newAtom->getPropIfPresent(common_properties::_QueryMass, val)) {
      // FIX: technically should do something with this
      // newAtom->setMass(val);
    }
    if (newAtom->getPropIfPresent(common_properties::_QueryIsotope, val)) {
      newAtom->setIsotope(val);
    }
  }
  // and the bonds:
  ROMol::BOND_ITER_PAIR bondItP = prodTemplate->getEdges();
  while (bondItP.first != bondItP.second) {
    const Bond *oldB = (*prodTemplate)[*(bondItP.first++)];
    unsigned int bondIdx;
    bondIdx = res->addBond(oldB->getBeginAtomIdx(), oldB->getEndAtomIdx(),
                           oldB->getBondType()) -
              1;
    // make sure we don't lose the bond dir information:
    Bond *newB = res->getBondWithIdx(bondIdx);
    newB->setBondDir(oldB->getBondDir());
    // Special case/hack:
    //  The product has been processed by the SMARTS parser.
    //  The SMARTS parser tags unspecified bonds as single, but then adds
    //  a query so that they match single or double
    //  This caused Issue 1748846
    //   http://sourceforge.net/tracker/index.php?func=detail&aid=1748846&group_id=160139&atid=814650
    //  We need to fix that little problem now:
    if (oldB->hasQuery()) {
      //  remember that the product has been processed by the SMARTS parser.
      std::string queryDescription = oldB->getQuery()->getDescription();
      if (queryDescription == "BondOr" && oldB->getBondType() == Bond::SINGLE) {
        //  We need to fix that little problem now:
        if (newB->getBeginAtom()->getIsAromatic() &&
            newB->getEndAtom()->getIsAromatic()) {
          newB->setBondType(Bond::AROMATIC);
          newB->setIsAromatic(true);
        } else {
          newB->setBondType(Bond::SINGLE);
          newB->setIsAromatic(false);
        }
      } else if (queryDescription == "BondNull") {
        newB->setProp(common_properties::NullBond, 1);
      }
    }
    // copy properties over:
    bool preserveExisting = true;
    newB->updateProps(*static_cast<const RDProps *>(oldB), preserveExisting);
  }
  return RWMOL_SPTR(res);
}  // end of convertTemplateToMol()
Example #2
0
  void dfsBuildStack(ROMol &mol,int atomIdx,int inBondIdx,
                     std::vector<AtomColors> &colors,
                     VECT_INT_VECT &cycles,
                     const UINT_VECT &ranks,
                     INT_VECT &cyclesAvailable,
                     MolStack &molStack,
                     INT_VECT &atomOrders,
                     INT_VECT &bondVisitOrders,
                     VECT_INT_VECT &atomRingClosures,
                     std::vector<INT_LIST> &atomTraversalBondOrder,
                     const boost::dynamic_bitset<> *bondsInPlay,
                     const std::vector<std::string> *bondSymbols
                     ){
#if 0
    std::cerr<<"traverse from atom: "<<atomIdx<<" via bond "<<inBondIdx<<" num cycles available: "
             <<std::count(cyclesAvailable.begin(),cyclesAvailable.end(),1)<<std::endl;
#endif
    Atom *atom = mol.getAtomWithIdx(atomIdx);
    INT_LIST directTravList,cycleEndList;
    boost::dynamic_bitset<> seenFromHere(mol.getNumAtoms());
    
    seenFromHere.set(atomIdx);
    molStack.push_back(MolStackElem(atom));
    atomOrders[atom->getIdx()] = molStack.size();
    colors[atomIdx] = GREY_NODE;

    INT_LIST travList;
    if(inBondIdx>=0) travList.push_back(inBondIdx);

    
    // ---------------------
    //
    //  Add any ring closures
    //
    // ---------------------
    if(atomRingClosures[atomIdx].size()){
      std::vector<unsigned int> ringsClosed;
      BOOST_FOREACH(int bIdx,atomRingClosures[atomIdx]){
        travList.push_back(bIdx);
        Bond *bond = mol.getBondWithIdx(bIdx);
        seenFromHere.set(bond->getOtherAtomIdx(atomIdx));
        unsigned int ringIdx;
        if(bond->getPropIfPresent(common_properties::_TraversalRingClosureBond, ringIdx)){
          // this is end of the ring closure
          // we can just pull the ring index from the bond itself:
          molStack.push_back(MolStackElem(bond,atomIdx));
          bondVisitOrders[bIdx]=molStack.size();
          molStack.push_back(MolStackElem(ringIdx));
          // don't make the ring digit immediately available again: we don't want to have the same
          // ring digit opening and closing rings on an atom.
          ringsClosed.push_back(ringIdx-1);
        } else {
          // this is the beginning of the ring closure, we need to come up with a ring index:
          INT_VECT::const_iterator cAIt=std::find(cyclesAvailable.begin(),
                                                  cyclesAvailable.end(),1);
          if(cAIt==cyclesAvailable.end()){
            throw ValueErrorException("Too many rings open at once. SMILES cannot be generated.");
          }
          unsigned int lowestRingIdx =  cAIt-cyclesAvailable.begin();
          cyclesAvailable[lowestRingIdx] = 0;
          ++lowestRingIdx;
          bond->setProp(common_properties::_TraversalRingClosureBond,lowestRingIdx);
          molStack.push_back(MolStackElem(lowestRingIdx));
        }
      }
Example #3
0
  void canonicalDFSTraversal(ROMol &mol,int atomIdx,int inBondIdx,
                             std::vector<AtomColors> &colors,
                             VECT_INT_VECT &cycles,
                             INT_VECT &ranks,
                             INT_VECT &cyclesAvailable,
                             MolStack &molStack,
                             INT_VECT &atomOrders,
                             INT_VECT &bondVisitOrders,
                             VECT_INT_VECT &atomRingClosures,
                             std::vector<INT_LIST> &atomTraversalBondOrder,
                             const boost::dynamic_bitset<> *bondsInPlay,
                             const std::vector<std::string> *bondSymbols
                             ){
    PRECONDITION(colors.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(ranks.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(atomOrders.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(bondVisitOrders.size()>=mol.getNumBonds(),"vector too small");
    PRECONDITION(atomRingClosures.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(atomTraversalBondOrder.size()>=mol.getNumAtoms(),"vector too small");
    PRECONDITION(!bondsInPlay || bondsInPlay->size()>=mol.getNumBonds(),"bondsInPlay too small");
    PRECONDITION(!bondSymbols || bondSymbols->size()>=mol.getNumBonds(),"bondSymbols too small");

    int nAttached=0;

    Atom *atom = mol.getAtomWithIdx(atomIdx);
    INT_LIST directTravList,cycleEndList;

    molStack.push_back(MolStackElem(atom));
    atomOrders[atom->getIdx()] = molStack.size();
    colors[atomIdx] = GREY_NODE;

    // ---------------------
    //
    //  Build the list of possible destinations from here
    //
    // ---------------------
    std::vector< PossibleType > possibles;
    possibles.resize(0);
    ROMol::OBOND_ITER_PAIR bondsPair = mol.getAtomBonds(atom);
    possibles.reserve(bondsPair.second-bondsPair.first);

    while(bondsPair.first != bondsPair.second){
      BOND_SPTR theBond = mol[*(bondsPair.first)];
      bondsPair.first++;
      if(bondsInPlay && !(*bondsInPlay)[theBond->getIdx()]) continue;
      if(inBondIdx<0 || theBond->getIdx() != static_cast<unsigned int>(inBondIdx)){
        int otherIdx = theBond->getOtherAtomIdx(atomIdx);
        long rank=ranks[otherIdx];
        // ---------------------
        //
        // things are a bit more complicated if we are sitting on a
        // ring atom we would like to traverse first to the
        // ring-closure atoms, then to atoms outside the ring first,
        // then to atoms in the ring that haven't already been visited
        // (non-ring-closure atoms).
        // 
        //  Here's how the black magic works:
        //   - non-ring atom neighbors have their original ranks
        //   - ring atom neighbors have this added to their ranks:
        //       (Bond::OTHER - bondOrder)*MAX_NATOMS*MAX_NATOMS
        //   - ring-closure neighbors lose a factor of:
        //       (Bond::OTHER+1)*MAX_NATOMS*MAX_NATOMS
        //
        //  This tactic biases us to traverse to non-ring neighbors first,
        //  original ordering if bond orders are all equal... crafty, neh?
        //  
        // ---------------------
        if( colors[otherIdx] == GREY_NODE ) {
          rank -= static_cast<int>(Bond::OTHER+1) *
            MAX_NATOMS*MAX_NATOMS;
          if(!bondSymbols){
            rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) *
              MAX_NATOMS;
          } else {
            const std::string &symb=(*bondSymbols)[theBond->getIdx()];
            boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end());
            rank += (hsh%MAX_NATOMS) *  MAX_NATOMS;
          }
        } else if( theBond->getOwningMol().getRingInfo()->numBondRings(theBond->getIdx()) ){
          if(!bondSymbols){
            rank += static_cast<int>(Bond::OTHER - theBond->getBondType()) *
              MAX_NATOMS*MAX_NATOMS;
          } else {
            const std::string &symb=(*bondSymbols)[theBond->getIdx()];
            boost::uint32_t hsh=gboost::hash_range(symb.begin(),symb.end());
            rank += (hsh%MAX_NATOMS)*MAX_NATOMS*MAX_NATOMS;
          }
        }
        possibles.push_back(PossibleType(rank,otherIdx,theBond.get()));
      }
    }

    // ---------------------
    //
    //  Sort on ranks
    //
    // ---------------------
    std::sort(possibles.begin(),possibles.end(),_possibleComp);


    // ---------------------
    //
    //  Now work the children
    //
    // ---------------------
    std::vector<MolStack> subStacks;
    for(std::vector<PossibleType>::iterator possiblesIt=possibles.begin();
        possiblesIt!=possibles.end();
        possiblesIt++){
      MolStack subStack;
#if 0
      int possibleIdx = possiblesIt->second.first;
      Bond *bond = possiblesIt->second.second;
#endif
      int possibleIdx = possiblesIt->get<1>();
      Bond *bond = possiblesIt->get<2>();
      Atom *otherAtom=mol.getAtomWithIdx(possibleIdx);
      unsigned int lowestRingIdx;
      INT_VECT::const_iterator cAIt;
      switch(colors[possibleIdx]){
      case WHITE_NODE:
        // -----
        // we haven't seen this node at all before
        // -----

        // it might have some residual data from earlier calls, clean that up:
        if(otherAtom->hasProp("_TraversalBondIndexOrder")){
          otherAtom->clearProp("_TraversalBondIndexOrder");
        }

        directTravList.push_back(bond->getIdx());
        subStack.push_back(MolStackElem(bond,atomIdx));
        canonicalDFSTraversal(mol,possibleIdx,bond->getIdx(),colors,
                              cycles,ranks,cyclesAvailable,subStack,
                              atomOrders,bondVisitOrders,atomRingClosures,atomTraversalBondOrder,
                              bondsInPlay,bondSymbols);
        subStacks.push_back(subStack);
        nAttached += 1;
        break;
      case GREY_NODE:
        // -----
        // we've seen this, but haven't finished it (we're finishing a ring)
        // -----
        cycleEndList.push_back(bond->getIdx());
        cAIt=std::find(cyclesAvailable.begin(),
                       cyclesAvailable.end(),1);
        if(cAIt==cyclesAvailable.end()){
          throw ValueErrorException("Too many rings open at once. SMILES cannot be generated.");
        }
        lowestRingIdx =  cAIt-cyclesAvailable.begin();
        cyclesAvailable[lowestRingIdx] = 0;
        cycles[possibleIdx].push_back(lowestRingIdx);
        ++lowestRingIdx;

        bond->setProp("_TraversalRingClosureBond",lowestRingIdx);
        molStack.push_back(MolStackElem(bond,
                                        atom->getIdx()));
        molStack.push_back(MolStackElem(lowestRingIdx));

        // we need to add this bond (which closes the ring) to the traversal list for the
        // other atom as well:
        atomTraversalBondOrder[otherAtom->getIdx()].push_back(bond->getIdx());
        atomRingClosures[otherAtom->getIdx()].push_back(bond->getIdx());

        break;
      default:
        // -----
        // this node has been finished. don't do anything.
        // -----
        break;
      }
    }
    

    INT_VECT &ringClosures=atomRingClosures[atom->getIdx()];
    
    CHECK_INVARIANT(ringClosures.size()==cycles[atomIdx].size(),
                    "ring closure mismatch");
    for(unsigned int i=0;i<ringClosures.size();i++){
      int ringIdx=cycles[atomIdx][i];
      ringIdx += 1;
      molStack.push_back(MolStackElem(ringIdx));
    }
    cycles[atomIdx].resize(0);
  
    MolStack::const_iterator ciMS;
    for(int i=0;i<nAttached;i++){
      if(i<nAttached-1){
        int branchIdx=0;
        if(subStacks[i].begin()->type==MOL_STACK_ATOM){
          branchIdx=subStacks[i].begin()->obj.atom->getIdx();
        } else if(subStacks[i].begin()->type==MOL_STACK_BOND){
          branchIdx=-1*subStacks[i].begin()->obj.bond->getIdx();
        } else {
          ASSERT_INVARIANT(0,"branch started with something other than an atom or bond");
        }
        molStack.push_back(MolStackElem("(",branchIdx));
        for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){
          molStack.push_back(*ciMS);
          switch(ciMS->type){
          case MOL_STACK_ATOM:
            atomOrders[ciMS->obj.atom->getIdx()] = molStack.size();
            break;
          case MOL_STACK_BOND:
            bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size();
            break;
          default:
            break;
          }
        }
        molStack.push_back(MolStackElem(")",branchIdx));
      } else {
        for(ciMS=subStacks[i].begin();ciMS!=subStacks[i].end();ciMS++){
          molStack.push_back(*ciMS);
          switch(ciMS->type){
          case MOL_STACK_ATOM:
            atomOrders[ciMS->obj.atom->getIdx()] = molStack.size();
            break;
          case MOL_STACK_BOND:
            bondVisitOrders[ciMS->obj.bond->getIdx()] = molStack.size();
            break;
          default:
            break;
          }
        }
      }
    }

    //std::cerr<<"*****>>>>>> Traversal results for atom: "<<atom->getIdx()<<"> ";
    INT_LIST travList;
    // first push on the incoming bond:
    if(inBondIdx >= 0){
      //std::cerr<<" "<<inBondIdx;
      travList.push_back(inBondIdx);
    }

    // ... ring closures that end here:
    for(INT_LIST_CI ilci=cycleEndList.begin();ilci!=cycleEndList.end();++ilci){
      //std::cerr<<" ["<<*ilci<<"]";
      travList.push_back(*ilci);
    }


    // ... ring closures that start here:
    // if(atom->hasProp("_TraversalBondIndexOrder")){
    //   INT_LIST indirectTravList;
    //   atom->getProp("_TraversalBondIndexOrder",indirectTravList);
    //   for(INT_LIST_CI ilci=indirectTravList.begin();ilci!=indirectTravList.end();++ilci){
    //     //std::cerr<<" ("<<*ilci<<")";
    //     travList.push_back(*ilci);
    //   }
    // }
    BOOST_FOREACH(int ili,atomTraversalBondOrder[atom->getIdx()]){
      travList.push_back(ili);
    }