Ejemplo n.º 1
0
// Figure out the CIP ranks for the atoms of a molecule
void assignAtomCIPRanks(const ROMol &mol, UINT_VECT &ranks) {
  PRECONDITION((!ranks.size() || ranks.size() >= mol.getNumAtoms()),
               "bad ranks size");
  if (!ranks.size()) ranks.resize(mol.getNumAtoms());
  unsigned int numAtoms = mol.getNumAtoms();
#ifndef USE_NEW_STEREOCHEMISTRY
  // get the initial invariants:
  DOUBLE_VECT invars(numAtoms, 0);
  buildCIPInvariants(mol, invars);
  iterateCIPRanks(mol, invars, ranks, false);
#else
  Canon::chiralRankMolAtoms(mol, ranks);
#endif

  // copy the ranks onto the atoms:
  for (unsigned int i = 0; i < numAtoms; ++i) {
    mol[i]->setProp(common_properties::_CIPRank, ranks[i], 1);
  }
}
Ejemplo n.º 2
0
void iterateCIPRanks(const ROMol &mol, DOUBLE_VECT &invars, UINT_VECT &ranks,
                     bool seedWithInvars) {
  PRECONDITION(invars.size() == mol.getNumAtoms(), "bad invars size");
  PRECONDITION(ranks.size() >= mol.getNumAtoms(), "bad ranks size");

  unsigned int numAtoms = mol.getNumAtoms();
  CIP_ENTRY_VECT cipEntries(numAtoms);
  INT_LIST allIndices;
  for (unsigned int i = 0; i < numAtoms; ++i) {
    allIndices.push_back(i);
  }
#ifdef VERBOSE_CANON
  BOOST_LOG(rdDebugLog) << "invariants:" << std::endl;
  for (unsigned int i = 0; i < numAtoms; i++) {
    BOOST_LOG(rdDebugLog) << i << ": " << invars[i] << std::endl;
  }
#endif

  // rank those:
  Rankers::rankVect(invars, ranks);
#ifdef VERBOSE_CANON
  BOOST_LOG(rdDebugLog) << "initial ranks:" << std::endl;
  for (unsigned int i = 0; i < numAtoms; ++i) {
    BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << std::endl;
  }
#endif
  // Start each atom's rank vector with its atomic number:
  //  Note: in general one should avoid the temptation to
  //  use invariants here, those lead to incorrect answers
  for (unsigned int i = 0; i < numAtoms; i++) {
    if (!seedWithInvars) {
      cipEntries[i].push_back(mol[i]->getAtomicNum());
      cipEntries[i].push_back(static_cast<int>(ranks[i]));
    } else {
      cipEntries[i].push_back(static_cast<int>(invars[i]));
    }
  }

  // Loop until either:
  //   1) all classes are uniquified
  //   2) the number of ranks doesn't change from one iteration to
  //      the next
  //   3) we've gone through maxIts times
  //      maxIts is calculated by dividing the number of atoms
  //      by 2. That's a pessimal version of the
  //      maximum number of steps required for two atoms to
  //      "feel" each other (each influences one additional
  //      neighbor shell per iteration).
  unsigned int maxIts = numAtoms / 2 + 1;
  unsigned int numIts = 0;
  int lastNumRanks = -1;
  unsigned int numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1;
  while (numRanks < numAtoms && numIts < maxIts &&
         (lastNumRanks < 0 ||
          static_cast<unsigned int>(lastNumRanks) < numRanks)) {
    unsigned int longestEntry = 0;
    // ----------------------------------------------------
    //
    // for each atom, get a sorted list of its neighbors' ranks:
    //
    for (INT_LIST_I it = allIndices.begin(); it != allIndices.end(); ++it) {
      CIP_ENTRY localEntry;
      localEntry.reserve(16);

      // start by pushing on our neighbors' ranks:
      ROMol::OEDGE_ITER beg, end;
      boost::tie(beg, end) = mol.getAtomBonds(mol[*it].get());
      while (beg != end) {
        const Bond *bond = mol[*beg].get();
        ++beg;
        unsigned int nbrIdx = bond->getOtherAtomIdx(*it);
        const Atom *nbr = mol[nbrIdx].get();

        int rank = ranks[nbrIdx] + 1;
        // put the neighbor in 2N times where N is the bond order as a double.
        // this is to treat aromatic linkages on fair footing. i.e. at least in
        // the
        // first iteration --c(:c):c and --C(=C)-C should look the same.
        // this was part of issue 3009911

        unsigned int count;
        if (bond->getBondType() == Bond::DOUBLE && nbr->getAtomicNum() == 15 &&
            (nbr->getDegree() == 4 || nbr->getDegree() == 3)) {
          // a special case for chiral phophorous compounds
          // (this was leading to incorrect assignment of
          // R/S labels ):
          count = 1;

          // general justification of this is:
          // Paragraph 2.2. in the 1966 article is "Valence-Bond Conventions:
          // Multiple-Bond Unsaturation and Aromaticity". It contains several
          // conventions of which convention (b) is the one applying here:
          // "(b) Contibutions by d orbitals to bonds of quadriligant atoms are
          // neglected."
          // FIX: this applies to more than just P
        } else {
          count = static_cast<unsigned int>(
              floor(2. * bond->getBondTypeAsDouble() + .1));
        }
        CIP_ENTRY::iterator ePos =
            std::lower_bound(localEntry.begin(), localEntry.end(), rank);
        localEntry.insert(ePos, count, rank);
        ++nbr;
      }
      // add a zero for each coordinated H:
      // (as long as we're not a query atom)
      if (!mol[*it]->hasQuery()) {
        localEntry.insert(localEntry.begin(), mol[*it]->getTotalNumHs(), 0);
      }

      // we now have a sorted list of our neighbors' ranks,
      // copy it on in reversed order:
      cipEntries[*it].insert(cipEntries[*it].end(), localEntry.rbegin(),
                             localEntry.rend());
      if (cipEntries[*it].size() > longestEntry) {
        longestEntry = rdcast<unsigned int>(cipEntries[*it].size());
      }
    }
    // ----------------------------------------------------
    //
    // pad the entries so that we compare rounds to themselves:
    //
    for (INT_LIST_I it = allIndices.begin(); it != allIndices.end(); ++it) {
      unsigned int sz = rdcast<unsigned int>(cipEntries[*it].size());
      if (sz < longestEntry) {
        cipEntries[*it].insert(cipEntries[*it].end(), longestEntry - sz, -1);
      }
    }
    // ----------------------------------------------------
    //
    // sort the new ranks and update the list of active indices:
    //
    lastNumRanks = numRanks;

    Rankers::rankVect(cipEntries, ranks);
    numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1;

    // now truncate each vector and stick the rank at the end
    for (unsigned int i = 0; i < numAtoms; ++i) {
      cipEntries[i][numIts + 1] = ranks[i];
      cipEntries[i].erase(cipEntries[i].begin() + numIts + 2,
                          cipEntries[i].end());
    }

    ++numIts;
#ifdef VERBOSE_CANON
    BOOST_LOG(rdDebugLog) << "strings and ranks:" << std::endl;
    for (unsigned int i = 0; i < numAtoms; i++) {
      BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << " > ";
      debugVect(cipEntries[i]);
    }
#endif
  }
}