Пример #1
0
static inline void processCuts(
    size_t i, size_t maxCuts, BondVector_t& bonds_selected,
    const std::vector<BondVector_t>& matching_bonds, const ROMol& mol,
    std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >& res) {
  for (size_t x = i; x < matching_bonds.size(); x++) {
    appendBonds(bonds_selected, matching_bonds[x]);
    addResult(res, mol, bonds_selected, maxCuts);
    if (bonds_selected.size() < maxCuts)
      processCuts(x + 1, maxCuts, bonds_selected, matching_bonds, mol, res);
    bonds_selected.pop_back();
  }
}
Пример #2
0
static inline void processCuts(
    size_t i, size_t minCuts, size_t maxCuts, BondVector_t& bonds_selected,
    const std::vector<BondVector_t>& matching_bonds, const ROMol& mol,
    std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >& res) {
  if(maxCuts < minCuts)
    throw ValueErrorException("supplied maxCuts is less than minCuts");
        
  if(minCuts==0)
    throw ValueErrorException("minCuts must be greater than 0");

  for (size_t x = i; x < matching_bonds.size(); x++) {
    appendBonds(bonds_selected, matching_bonds[x]);
    if(bonds_selected.size() >= minCuts) {
      addResult(res, mol, bonds_selected, maxCuts);
    }
    if (bonds_selected.size() < maxCuts) {
      processCuts(x + 1, minCuts, maxCuts, bonds_selected, matching_bonds, mol, res);
    }
    
    bonds_selected.pop_back();
  }
}
Пример #3
0
static void addResult(std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >&
                          res,  // const SignatureVector& resSignature,
                      const ROMol& mol,
                      const BondVector_t& bonds_selected, size_t maxCuts) {
#ifdef _DEBUG
  std::cout << res.size() + 1 << ": ";
#endif
  RWMol em(mol);
  // loop through the bonds to delete. == deleteBonds()
  unsigned isotope = 0;
  std::map<unsigned, unsigned> isotope_track;
  for (size_t i = 0; i < bonds_selected.size(); i++) {
#ifdef _DEBUG
    {
      std::string symbol =
          em.getAtomWithIdx(bonds_selected[i].first)->getSymbol();
      int label = 0;
      em.getAtomWithIdx(bonds_selected[i].first)
          ->getPropIfPresent(common_properties::molAtomMapNumber, label);
      char a1[32];
      if (0 == label)
        sprintf(a1, "\'%s\'", symbol.c_str(), label);
      else
        sprintf(a1, "\'%s:%u\'", symbol.c_str(), label);
      symbol = em.getAtomWithIdx(bonds_selected[i].second)->getSymbol();
      label = 0;
      em.getAtomWithIdx(bonds_selected[i].second)
          ->getPropIfPresent(common_properties::molAtomMapNumber, label);
      char a2[32];
      if (0 == label)
        sprintf(a2, "\'%s\'", symbol.c_str(), label);
      else
        sprintf(a2, "\'%s:%u\'", symbol.c_str(), label);

      std::cout << "(" << bonds_selected[i].first << a1 << ","
                << bonds_selected[i].second << a2 << ") ";
    }
#endif
    isotope += 1;
    // remove the bond
    em.removeBond(bonds_selected[i].first, bonds_selected[i].second);

    // now add attachement points and set attachment point lables
    Atom* a = new Atom(0);
    a->setProp(common_properties::molAtomMapNumber, (int)isotope);
    unsigned newAtomA = em.addAtom(a, true, true);
    em.addBond(bonds_selected[i].first, newAtomA, Bond::SINGLE);
    a = new Atom(0);
    a->setProp(common_properties::molAtomMapNumber, (int)isotope);
    unsigned newAtomB = em.addAtom(a, true, true);
    em.addBond(bonds_selected[i].second, newAtomB, Bond::SINGLE);

    // keep track of where to put isotopes
    isotope_track[newAtomA] = isotope;
    isotope_track[newAtomB] = isotope;
  }
#ifdef _DEBUG
  std::cout << "\n";
#endif
  RWMOL_SPTR core, side_chains;  // core & side_chains output molecules

  if (isotope == 1) {
    side_chains = RWMOL_SPTR(new RWMol(em));  // output = '%s,%s,,%s.%s'
// DEBUG PRINT
#ifdef _DEBUG
// OK: std::cout<<res.size()+1<<" isotope="<< isotope <<","<<
// MolToSmiles(*side_chains, true) <<"\n";
#endif
  } else if (isotope >= 2) {
    std::vector<std::vector<int> > frags;
    unsigned int nFrags = MolOps::getMolFrags(em, frags);

    //#check if its a valid triple or bigger cut.  matchObj = re.search(
    //'\*.*\*.*\*', f)
    // check if exists a fragment with maxCut connection points (*.. *.. *)
    if (isotope >= 3) {
      bool valid = false;
      for (size_t i = 0; i < nFrags; i++) {
        unsigned nLabels = 0;
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          if (isotope_track.end() !=
              isotope_track.find(frags[i][ai]))  // new added atom
            ++nLabels;                           // found connection point
        }
        if (nLabels >=
            maxCuts) {  // looks like it should be selected as core !  ??????
          valid = true;
          break;
        }
      }
      if (!valid) {
#ifdef _DEBUG
        std::cout << "isotope>=3: invalid fragments. fragment with maxCut "
                     "connection points not found"
                  << "\n";
#endif
        return;
      }
    }

    size_t iCore = std::numeric_limits<size_t>::max();
    side_chains = RWMOL_SPTR(new RWMol);
    std::map<unsigned, unsigned>
        visitedBonds;  // key is bond index in source molecule
    unsigned maxAttachments = 0;
    for (size_t i = 0; i < frags.size(); i++) {
      unsigned nAttachments = 0;
      for (size_t ai = 0; ai < frags[i].size(); ai++) {
        if (isotope_track.end() !=
            isotope_track.find(
                frags[i][ai]))  // == if(a->hasProp("molAtomMapNumber"))
          ++nAttachments;
      }
      if (maxAttachments < nAttachments) maxAttachments = nAttachments;
      if (1 == nAttachments) {  // build side-chain set of molecules from
                                // selected fragment
        std::map<unsigned, unsigned>
            newAtomMap;  // key is atom index in source molecule
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          Atom* a = em.getAtomWithIdx(frags[i][ai]);
          newAtomMap[frags[i][ai]] =
              side_chains->addAtom(a->copy(), true, true);
        }
        // add all bonds from this fragment
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          Atom* a = em.getAtomWithIdx(frags[i][ai]);
          ROMol::OEDGE_ITER beg, end;
          for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) {
            const BOND_SPTR bond = em[*beg];
            if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) ||
                newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) ||
                visitedBonds.end() != visitedBonds.find(bond->getIdx()))
              continue;
            unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()];
            unsigned ai2 = newAtomMap[bond->getEndAtomIdx()];
            unsigned bi = side_chains->addBond(ai1, ai2, bond->getBondType());
            visitedBonds[bond->getIdx()] = bi;
          }
        }
      } else {  // select the core fragment
// DEBUG PRINT
#ifdef _DEBUG
        if (iCore != -1)
          std::cout << "Next CORE found. iCore=" << iCore << " New i=" << i
                    << " nAttachments=" << nAttachments << "\n";
#endif
        if (nAttachments >= maxAttachments)  // Choose a fragment with maximal
                                             // number of connection points as a
                                             // core
          iCore = i;
      }
    }
    // build core molecule from selected fragment
    if (iCore != std::numeric_limits<size_t>::max()) {
      core = RWMOL_SPTR(new RWMol);
      visitedBonds.clear();
      std::map<unsigned, unsigned>
          newAtomMap;  // key is atom index in source molecule
      for (size_t i = 0; i < frags[iCore].size(); i++) {
        unsigned ai = frags[iCore][i];
        Atom* a = em.getAtomWithIdx(ai);
        newAtomMap[ai] = core->addAtom(a->copy(), true, true);
      }
      // add all bonds from this fragment
      for (size_t ai = 0; ai < frags[iCore].size(); ai++) {
        Atom* a = em.getAtomWithIdx(frags[iCore][ai]);
        ROMol::OEDGE_ITER beg, end;
        for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) {
          const BOND_SPTR bond = em[*beg];
          if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) ||
              newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) ||
              visitedBonds.end() != visitedBonds.find(bond->getIdx()))
            continue;
          unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()];
          unsigned ai2 = newAtomMap[bond->getEndAtomIdx()];
          unsigned bi = core->addBond(ai1, ai2, bond->getBondType());
          visitedBonds[bond->getIdx()] = bi;
        }
      }
// DEBUG PRINT
#ifdef _DEBUG
// std::cout<<res.size()+1<<" isotope="<< isotope <<" "<< MolToSmiles(*core,
// true)<<", "<<MolToSmiles(*side_chains, true)<<"\n";
#endif
    }  // iCore != -1
  }
  // check for duplicates:
  bool resFound = false;
  size_t ri = 0;
  for (ri = 0; ri < res.size(); ri++) {
    const std::pair<ROMOL_SPTR, ROMOL_SPTR>& r = res[ri];
    if (side_chains->getNumAtoms() == r.second->getNumAtoms() &&
        side_chains->getNumBonds() == r.second->getNumBonds() &&
        ((NULL == core.get() && NULL == r.first.get()) ||
         (NULL != core.get() && NULL != r.first.get() &&
          core->getNumAtoms() == r.first->getNumAtoms() &&
          core->getNumBonds() == r.first->getNumBonds()))) {
      // ToDo accurate check:
      // 1. compare hash code
      if (computeMorganCodeHash(*side_chains) ==
              computeMorganCodeHash(*r.second) &&
          (NULL == core ||
           computeMorganCodeHash(*core) == computeMorganCodeHash(*r.first))) {
        // 2. final check to exclude hash collisions
        // We decided that it does not neccessary to implement
        resFound = true;
        break;
      }
    }
  }
  if (!resFound) {
    //std::cerr << "**********************" << std::endl;
    // From rfrag.py
    // now change the labels on sidechains and core
    // to get the new labels, cansmi the dot-disconnected side chains
    // the first fragment in the side chains has attachment label 1, 2nd: 2, 3rd: 3
    // then change the labels accordingly in the core
    std::map<unsigned int, int> canonicalAtomMaps;
    if( side_chains.get() ) {
      RWMol tmp_side_chain(*(side_chains.get()));
      std::vector<int> oldMaps(tmp_side_chain.getNumAtoms(), 0);
      
      // clear atom labels (they are used in canonicalization)
      //  and move them to dummy storage
      for (ROMol::AtomIterator at = tmp_side_chain.beginAtoms(); at != tmp_side_chain.endAtoms();
           ++at) {
        int label = 0;
        if ((*at)->getPropIfPresent(common_properties::molAtomMapNumber, label) ) {
          (*at)->clearProp(common_properties::molAtomMapNumber);
          oldMaps[(*at)->getIdx()] = label;
        }
      }

      const bool doIsomericSmiles = true; // should this be false???
      std::string smiles = MolToSmiles(tmp_side_chain, doIsomericSmiles);
      //std::cerr << "smiles: " << smiles << std::endl;

      // Get the canonical output order and use it to remap
      //  the atom maps int the side chains
      //  these will get reapplied to the core (if there is a core)
      const std::vector<unsigned int> &ranks = tmp_side_chain.getProp<
        std::vector<unsigned int> >(
          common_properties::_smilesAtomOutputOrder);
      
      std::vector<std::pair<unsigned int, int> > rankedAtoms;

      for(size_t idx=0;idx<ranks.size();++idx) {
        unsigned int atom_idx = ranks[idx];
        if(oldMaps[atom_idx] >0) {
          const int label = oldMaps[atom_idx];
          //std::cerr << "atom_idx: " << atom_idx << " rank: " << ranks[atom_idx] <<
          //    " molAtomMapNumber: " << label << std::endl;
          rankedAtoms.push_back(std::make_pair(idx, label));
        }
      }
      std::sort(rankedAtoms.begin(), rankedAtoms.end());
      int nextMap = 0;
      for(size_t i=0;i<rankedAtoms.size();++i) {
        if(canonicalAtomMaps.find(rankedAtoms[i].second) == canonicalAtomMaps.end()) {
          //std::cerr << "Remapping: " << rankedAtoms[i].second << " " << " to " << (i+1) <<
          //    std::endl;
          canonicalAtomMaps[rankedAtoms[i].second] = ++nextMap;
        }
      }
    }
    
    //std::cerr << "======== Remap core " << std::endl;
    if( core.get() ) { // remap core if it exists
      for (ROMol::AtomIterator at = core->beginAtoms(); at != core->endAtoms();
           ++at) {
        int label = 0;
        if ((*at)->getPropIfPresent(common_properties::molAtomMapNumber, label) ) {
          //std::cerr << "remapping core: " << label << " :" << canonicalAtomMaps[label] <<
          //    std::endl;
          (*at)->setProp(common_properties::molAtomMapNumber, canonicalAtomMaps[label]);
        }
      }
    }
    
    //std::cerr << "======== Remap side-chain " << std::endl;
    for (ROMol::AtomIterator at = side_chains->beginAtoms(); at != side_chains->endAtoms();
         ++at) {
      int label = 0;
      if ((*at)->getPropIfPresent(common_properties::molAtomMapNumber, label) ) {
        //std::cerr << "remapping side chain: " << label << " :" << 
        // canonicalAtomMaps[label] << std::endl;
        (*at)->setProp(common_properties::molAtomMapNumber, canonicalAtomMaps[label]);
      }
    }

    
    res.push_back(std::pair<ROMOL_SPTR, ROMOL_SPTR>(core, side_chains));  //
  }
#ifdef _DEBUG
  else
    std::cout << res.size() + 1 << " --- DUPLICATE Result FOUND --- ri=" << ri
              << "\n";
#endif
}
Пример #4
0
static void addResult(std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >&
                          res,  // const SignatureVector& resSignature,
                      const ROMol& mol,
                      const BondVector_t& bonds_selected, size_t maxCuts) {
#ifdef _DEBUG
  std::cout << res.size() + 1 << ": ";
#endif
  RWMol em(mol);
  // loop through the bonds to delete. == deleteBonds()
  unsigned isotope = 0;
  std::map<unsigned, unsigned> isotope_track;
  for (size_t i = 0; i < bonds_selected.size(); i++) {
#ifdef _DEBUG
    {
      std::string symbol =
          em.getAtomWithIdx(bonds_selected[i].first)->getSymbol();
      int label = 0;
      em.getAtomWithIdx(bonds_selected[i].first)
          ->getPropIfPresent(common_properties::molAtomMapNumber, label);
      char a1[32];
      if (0 == label)
        sprintf(a1, "\'%s\'", symbol.c_str(), label);
      else
        sprintf(a1, "\'%s:%u\'", symbol.c_str(), label);
      symbol = em.getAtomWithIdx(bonds_selected[i].second)->getSymbol();
      label = 0;
      em.getAtomWithIdx(bonds_selected[i].second)
          ->getPropIfPresent(common_properties::molAtomMapNumber, label);
      char a2[32];
      if (0 == label)
        sprintf(a2, "\'%s\'", symbol.c_str(), label);
      else
        sprintf(a2, "\'%s:%u\'", symbol.c_str(), label);

      std::cout << "(" << bonds_selected[i].first << a1 << ","
                << bonds_selected[i].second << a2 << ") ";
    }
#endif
    isotope += 1;
    // remove the bond
    em.removeBond(bonds_selected[i].first, bonds_selected[i].second);

    // now add attachement points and set attachment point lables
    Atom* a = new Atom(0);
    a->setProp(common_properties::molAtomMapNumber, (int)isotope);
    unsigned newAtomA = em.addAtom(a, true, true);
    em.addBond(bonds_selected[i].first, newAtomA, Bond::SINGLE);
    a = new Atom(0);
    a->setProp(common_properties::molAtomMapNumber, (int)isotope);
    unsigned newAtomB = em.addAtom(a, true, true);
    em.addBond(bonds_selected[i].second, newAtomB, Bond::SINGLE);

    // keep track of where to put isotopes
    isotope_track[newAtomA] = isotope;
    isotope_track[newAtomB] = isotope;
  }
#ifdef _DEBUG
  std::cout << "\n";
#endif
  RWMOL_SPTR core, side_chains;  // core & side_chains output molecules

  if (isotope == 1) {
    side_chains = RWMOL_SPTR(new RWMol(em));  // output = '%s,%s,,%s.%s'
// DEBUG PRINT
#ifdef _DEBUG
// OK: std::cout<<res.size()+1<<" isotope="<< isotope <<","<<
// MolToSmiles(*side_chains, true) <<"\n";
#endif
  } else if (isotope >= 2) {
    std::vector<std::vector<int> > frags;
    unsigned int nFrags = MolOps::getMolFrags(em, frags);

    //#check if its a valid triple or bigger cut.  matchObj = re.search(
    //'\*.*\*.*\*', f)
    // check if exists a fragment with maxCut connection points (*.. *.. *)
    if (isotope >= 3) {
      bool valid = false;
      for (size_t i = 0; i < nFrags; i++) {
        unsigned nLabels = 0;
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          if (isotope_track.end() !=
              isotope_track.find(frags[i][ai]))  // new added atom
            ++nLabels;                           // found connection point
        }
        if (nLabels >=
            maxCuts) {  // looks like it should be selected as core !  ??????
          valid = true;
          break;
        }
      }
      if (!valid) {
#ifdef _DEBUG
        std::cout << "isotope>=3: invalid fragments. fragment with maxCut "
                     "connection points not found"
                  << "\n";
#endif
        return;
      }
    }

    size_t iCore = std::numeric_limits<size_t>::max();
    side_chains = RWMOL_SPTR(new RWMol);
    std::map<unsigned, unsigned>
        visitedBonds;  // key is bond index in source molecule
    unsigned maxAttachments = 0;
    for (size_t i = 0; i < frags.size(); i++) {
      unsigned nAttachments = 0;
      for (size_t ai = 0; ai < frags[i].size(); ai++) {
        if (isotope_track.end() !=
            isotope_track.find(
                frags[i][ai]))  // == if(a->hasProp("molAtomMapNumber"))
          ++nAttachments;
      }
      if (maxAttachments < nAttachments) maxAttachments = nAttachments;
      if (1 == nAttachments) {  // build side-chain set of molecules from
                                // selected fragment
        std::map<unsigned, unsigned>
            newAtomMap;  // key is atom index in source molecule
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          Atom* a = em.getAtomWithIdx(frags[i][ai]);
          newAtomMap[frags[i][ai]] =
              side_chains->addAtom(a->copy(), true, true);
        }
        // add all bonds from this fragment
        for (size_t ai = 0; ai < frags[i].size(); ai++) {
          Atom* a = em.getAtomWithIdx(frags[i][ai]);
          ROMol::OEDGE_ITER beg, end;
          for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) {
            const BOND_SPTR bond = em[*beg];
            if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) ||
                newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) ||
                visitedBonds.end() != visitedBonds.find(bond->getIdx()))
              continue;
            unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()];
            unsigned ai2 = newAtomMap[bond->getEndAtomIdx()];
            unsigned bi = side_chains->addBond(ai1, ai2, bond->getBondType());
            visitedBonds[bond->getIdx()] = bi;
          }
        }
      } else {  // select the core fragment
// DEBUG PRINT
#ifdef _DEBUG
        if (iCore != -1)
          std::cout << "Next CORE found. iCore=" << iCore << " New i=" << i
                    << " nAttachments=" << nAttachments << "\n";
#endif
        if (nAttachments >= maxAttachments)  // Choose a fragment with maximal
                                             // number of connection points as a
                                             // core
          iCore = i;
      }
    }
    // build core molecule from selected fragment
    if (iCore != std::numeric_limits<size_t>::max()) {
      core = RWMOL_SPTR(new RWMol);
      visitedBonds.clear();
      std::map<unsigned, unsigned>
          newAtomMap;  // key is atom index in source molecule
      for (size_t i = 0; i < frags[iCore].size(); i++) {
        unsigned ai = frags[iCore][i];
        Atom* a = em.getAtomWithIdx(ai);
        newAtomMap[ai] = core->addAtom(a->copy(), true, true);
      }
      // add all bonds from this fragment
      for (size_t ai = 0; ai < frags[iCore].size(); ai++) {
        Atom* a = em.getAtomWithIdx(frags[iCore][ai]);
        ROMol::OEDGE_ITER beg, end;
        for (boost::tie(beg, end) = em.getAtomBonds(a); beg != end; ++beg) {
          const BOND_SPTR bond = em[*beg];
          if (newAtomMap.end() == newAtomMap.find(bond->getBeginAtomIdx()) ||
              newAtomMap.end() == newAtomMap.find(bond->getEndAtomIdx()) ||
              visitedBonds.end() != visitedBonds.find(bond->getIdx()))
            continue;
          unsigned ai1 = newAtomMap[bond->getBeginAtomIdx()];
          unsigned ai2 = newAtomMap[bond->getEndAtomIdx()];
          unsigned bi = core->addBond(ai1, ai2, bond->getBondType());
          visitedBonds[bond->getIdx()] = bi;
        }
      }
// DEBUG PRINT
#ifdef _DEBUG
// std::cout<<res.size()+1<<" isotope="<< isotope <<" "<< MolToSmiles(*core,
// true)<<", "<<MolToSmiles(*side_chains, true)<<"\n";
#endif
    }  // iCore != -1
  }
  // check for dublicates:
  bool resFound = false;
  size_t ri = 0;
  for (ri = 0; ri < res.size(); ri++) {
    const std::pair<ROMOL_SPTR, ROMOL_SPTR>& r = res[ri];
    if (side_chains->getNumAtoms() == r.second->getNumAtoms() &&
        side_chains->getNumBonds() == r.second->getNumBonds() &&
        ((NULL == core.get() && NULL == r.first.get()) ||
         (NULL != core.get() && NULL != r.first.get() &&
          core->getNumAtoms() == r.first->getNumAtoms() &&
          core->getNumBonds() == r.first->getNumBonds()))) {
      // ToDo accurate check:
      // 1. compare hash code
      if (computeMorganCodeHash(*side_chains) ==
              computeMorganCodeHash(*r.second) &&
          (NULL == core ||
           computeMorganCodeHash(*core) == computeMorganCodeHash(*r.first))) {
        // 2. final check to exclude hash collisions
        // We decided that it does not neccessary to implement
        resFound = true;
        break;
      }
    }
  }
  if (!resFound)
    res.push_back(std::pair<ROMOL_SPTR, ROMOL_SPTR>(core, side_chains));  //
#ifdef _DEBUG
  else
    std::cout << res.size() + 1 << " --- DUPLICATE Result FOUND --- ri=" << ri
              << "\n";
#endif
}