Ejemplo n.º 1
0
// find the neighbors for an atoms that are not connected by single bond that is
// not refBond
// if checkDir is true only neighbor atoms with bonds marked with a direction
// will be returned
void findAtomNeighborsHelper(const ROMol &mol, const Atom *atom,
                             const Bond *refBond, UINT_VECT &neighbors,
                             bool checkDir = false) {
  PRECONDITION(atom, "bad atom");
  PRECONDITION(refBond, "bad bond");
  neighbors.clear();
  ROMol::OEDGE_ITER beg, end;
  boost::tie(beg, end) = mol.getAtomBonds(atom);
  while (beg != end) {
    const BOND_SPTR bond = mol[*beg];
    Bond::BondDir dir = bond->getBondDir();
    if (bond->getBondType() == Bond::SINGLE &&
        bond->getIdx() != refBond->getIdx()) {
      if (checkDir) {
        if ((dir != Bond::ENDDOWNRIGHT) && (dir != Bond::ENDUPRIGHT)) {
          ++beg;
          continue;
        }
      }
      Atom *nbrAtom = bond->getOtherAtom(atom);
      neighbors.push_back(nbrAtom->getIdx());
    }
    ++beg;
  }
}
Ejemplo n.º 2
0
std::string MolToSmarts(ROMol &inmol, bool doIsomericSmiles) {
  RDUNUSED_PARAM(doIsomericSmiles);  // does this parameter even make sense?
  std::string res;
  unsigned int nAtoms = inmol.getNumAtoms();
  if (!nAtoms) return "";

  ROMol mol(inmol);
  UINT_VECT ranks;
  ranks.resize(nAtoms);
  // For smiles writing we would be canonicalizing but we will not do that here.
  // We will simple use the atom indices as the rank
  for (ROMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
       atIt++) {
    ranks.push_back((*atIt)->getIdx());
  }

  std::vector<AtomColors> colors;
  colors.resize(nAtoms);
  std::vector<AtomColors>::iterator colorIt;
  for (colorIt = colors.begin(); colorIt != colors.end(); colorIt++)
    *colorIt = Canon::WHITE_NODE;

  colorIt = std::find(colors.begin(), colors.end(), Canon::WHITE_NODE);
  while (colorIt != colors.end()) {
    unsigned int nextAtomIdx = 0;
    unsigned int nextRank;
    std::string subSmi;
    nextRank = nAtoms + 1;
    for (unsigned int i = 0; i < nAtoms; i++) {
      if (colors[i] == Canon::WHITE_NODE && ranks[i] < nextRank) {
        nextRank = ranks[i];
        nextAtomIdx = i;
      }
    }

    subSmi = FragmentSmartsConstruct(mol, nextAtomIdx, colors, ranks);
    res += subSmi;

    colorIt = std::find(colors.begin(), colors.end(), Canon::WHITE_NODE);
    if (colorIt != colors.end()) {
      res += ".";
    }
  }
  return res;
}
Ejemplo n.º 3
0
// Figure out the CIP ranks for the atoms of a molecule
void assignAtomCIPRanks(const ROMol &mol, UINT_VECT &ranks) {
  PRECONDITION((!ranks.size() || ranks.size() >= mol.getNumAtoms()),
               "bad ranks size");
  if (!ranks.size()) ranks.resize(mol.getNumAtoms());
  unsigned int numAtoms = mol.getNumAtoms();
#ifndef USE_NEW_STEREOCHEMISTRY
  // get the initial invariants:
  DOUBLE_VECT invars(numAtoms, 0);
  buildCIPInvariants(mol, invars);
  iterateCIPRanks(mol, invars, ranks, false);
#else
  Canon::chiralRankMolAtoms(mol, ranks);
#endif

  // copy the ranks onto the atoms:
  for (unsigned int i = 0; i < numAtoms; ++i) {
    mol[i]->setProp(common_properties::_CIPRank, ranks[i], 1);
  }
}
Ejemplo n.º 4
0
void iterateCIPRanks(const ROMol &mol, DOUBLE_VECT &invars, UINT_VECT &ranks,
                     bool seedWithInvars) {
  PRECONDITION(invars.size() == mol.getNumAtoms(), "bad invars size");
  PRECONDITION(ranks.size() >= mol.getNumAtoms(), "bad ranks size");

  unsigned int numAtoms = mol.getNumAtoms();
  CIP_ENTRY_VECT cipEntries(numAtoms);
  INT_LIST allIndices;
  for (unsigned int i = 0; i < numAtoms; ++i) {
    allIndices.push_back(i);
  }
#ifdef VERBOSE_CANON
  BOOST_LOG(rdDebugLog) << "invariants:" << std::endl;
  for (unsigned int i = 0; i < numAtoms; i++) {
    BOOST_LOG(rdDebugLog) << i << ": " << invars[i] << std::endl;
  }
#endif

  // rank those:
  Rankers::rankVect(invars, ranks);
#ifdef VERBOSE_CANON
  BOOST_LOG(rdDebugLog) << "initial ranks:" << std::endl;
  for (unsigned int i = 0; i < numAtoms; ++i) {
    BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << std::endl;
  }
#endif
  // Start each atom's rank vector with its atomic number:
  //  Note: in general one should avoid the temptation to
  //  use invariants here, those lead to incorrect answers
  for (unsigned int i = 0; i < numAtoms; i++) {
    if (!seedWithInvars) {
      cipEntries[i].push_back(mol[i]->getAtomicNum());
      cipEntries[i].push_back(static_cast<int>(ranks[i]));
    } else {
      cipEntries[i].push_back(static_cast<int>(invars[i]));
    }
  }

  // Loop until either:
  //   1) all classes are uniquified
  //   2) the number of ranks doesn't change from one iteration to
  //      the next
  //   3) we've gone through maxIts times
  //      maxIts is calculated by dividing the number of atoms
  //      by 2. That's a pessimal version of the
  //      maximum number of steps required for two atoms to
  //      "feel" each other (each influences one additional
  //      neighbor shell per iteration).
  unsigned int maxIts = numAtoms / 2 + 1;
  unsigned int numIts = 0;
  int lastNumRanks = -1;
  unsigned int numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1;
  while (numRanks < numAtoms && numIts < maxIts &&
         (lastNumRanks < 0 ||
          static_cast<unsigned int>(lastNumRanks) < numRanks)) {
    unsigned int longestEntry = 0;
    // ----------------------------------------------------
    //
    // for each atom, get a sorted list of its neighbors' ranks:
    //
    for (INT_LIST_I it = allIndices.begin(); it != allIndices.end(); ++it) {
      CIP_ENTRY localEntry;
      localEntry.reserve(16);

      // start by pushing on our neighbors' ranks:
      ROMol::OEDGE_ITER beg, end;
      boost::tie(beg, end) = mol.getAtomBonds(mol[*it].get());
      while (beg != end) {
        const Bond *bond = mol[*beg].get();
        ++beg;
        unsigned int nbrIdx = bond->getOtherAtomIdx(*it);
        const Atom *nbr = mol[nbrIdx].get();

        int rank = ranks[nbrIdx] + 1;
        // put the neighbor in 2N times where N is the bond order as a double.
        // this is to treat aromatic linkages on fair footing. i.e. at least in
        // the
        // first iteration --c(:c):c and --C(=C)-C should look the same.
        // this was part of issue 3009911

        unsigned int count;
        if (bond->getBondType() == Bond::DOUBLE && nbr->getAtomicNum() == 15 &&
            (nbr->getDegree() == 4 || nbr->getDegree() == 3)) {
          // a special case for chiral phophorous compounds
          // (this was leading to incorrect assignment of
          // R/S labels ):
          count = 1;

          // general justification of this is:
          // Paragraph 2.2. in the 1966 article is "Valence-Bond Conventions:
          // Multiple-Bond Unsaturation and Aromaticity". It contains several
          // conventions of which convention (b) is the one applying here:
          // "(b) Contibutions by d orbitals to bonds of quadriligant atoms are
          // neglected."
          // FIX: this applies to more than just P
        } else {
          count = static_cast<unsigned int>(
              floor(2. * bond->getBondTypeAsDouble() + .1));
        }
        CIP_ENTRY::iterator ePos =
            std::lower_bound(localEntry.begin(), localEntry.end(), rank);
        localEntry.insert(ePos, count, rank);
        ++nbr;
      }
      // add a zero for each coordinated H:
      // (as long as we're not a query atom)
      if (!mol[*it]->hasQuery()) {
        localEntry.insert(localEntry.begin(), mol[*it]->getTotalNumHs(), 0);
      }

      // we now have a sorted list of our neighbors' ranks,
      // copy it on in reversed order:
      cipEntries[*it].insert(cipEntries[*it].end(), localEntry.rbegin(),
                             localEntry.rend());
      if (cipEntries[*it].size() > longestEntry) {
        longestEntry = rdcast<unsigned int>(cipEntries[*it].size());
      }
    }
    // ----------------------------------------------------
    //
    // pad the entries so that we compare rounds to themselves:
    //
    for (INT_LIST_I it = allIndices.begin(); it != allIndices.end(); ++it) {
      unsigned int sz = rdcast<unsigned int>(cipEntries[*it].size());
      if (sz < longestEntry) {
        cipEntries[*it].insert(cipEntries[*it].end(), longestEntry - sz, -1);
      }
    }
    // ----------------------------------------------------
    //
    // sort the new ranks and update the list of active indices:
    //
    lastNumRanks = numRanks;

    Rankers::rankVect(cipEntries, ranks);
    numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1;

    // now truncate each vector and stick the rank at the end
    for (unsigned int i = 0; i < numAtoms; ++i) {
      cipEntries[i][numIts + 1] = ranks[i];
      cipEntries[i].erase(cipEntries[i].begin() + numIts + 2,
                          cipEntries[i].end());
    }

    ++numIts;
#ifdef VERBOSE_CANON
    BOOST_LOG(rdDebugLog) << "strings and ranks:" << std::endl;
    for (unsigned int i = 0; i < numAtoms; i++) {
      BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << " > ";
      debugVect(cipEntries[i]);
    }
#endif
  }
}