Example #1
0
void iterateCIPRanks(const ROMol &mol, DOUBLE_VECT &invars, UINT_VECT &ranks,
                     bool seedWithInvars) {
  PRECONDITION(invars.size() == mol.getNumAtoms(), "bad invars size");
  PRECONDITION(ranks.size() >= mol.getNumAtoms(), "bad ranks size");

  unsigned int numAtoms = mol.getNumAtoms();
  CIP_ENTRY_VECT cipEntries(numAtoms);
  INT_LIST allIndices;
  for (unsigned int i = 0; i < numAtoms; ++i) {
    allIndices.push_back(i);
  }
#ifdef VERBOSE_CANON
  BOOST_LOG(rdDebugLog) << "invariants:" << std::endl;
  for (unsigned int i = 0; i < numAtoms; i++) {
    BOOST_LOG(rdDebugLog) << i << ": " << invars[i] << std::endl;
  }
#endif

  // rank those:
  Rankers::rankVect(invars, ranks);
#ifdef VERBOSE_CANON
  BOOST_LOG(rdDebugLog) << "initial ranks:" << std::endl;
  for (unsigned int i = 0; i < numAtoms; ++i) {
    BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << std::endl;
  }
#endif
  // Start each atom's rank vector with its atomic number:
  //  Note: in general one should avoid the temptation to
  //  use invariants here, those lead to incorrect answers
  for (unsigned int i = 0; i < numAtoms; i++) {
    if (!seedWithInvars) {
      cipEntries[i].push_back(mol[i]->getAtomicNum());
      cipEntries[i].push_back(static_cast<int>(ranks[i]));
    } else {
      cipEntries[i].push_back(static_cast<int>(invars[i]));
    }
  }

  // Loop until either:
  //   1) all classes are uniquified
  //   2) the number of ranks doesn't change from one iteration to
  //      the next
  //   3) we've gone through maxIts times
  //      maxIts is calculated by dividing the number of atoms
  //      by 2. That's a pessimal version of the
  //      maximum number of steps required for two atoms to
  //      "feel" each other (each influences one additional
  //      neighbor shell per iteration).
  unsigned int maxIts = numAtoms / 2 + 1;
  unsigned int numIts = 0;
  int lastNumRanks = -1;
  unsigned int numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1;
  while (numRanks < numAtoms && numIts < maxIts &&
         (lastNumRanks < 0 ||
          static_cast<unsigned int>(lastNumRanks) < numRanks)) {
    unsigned int longestEntry = 0;
    // ----------------------------------------------------
    //
    // for each atom, get a sorted list of its neighbors' ranks:
    //
    for (INT_LIST_I it = allIndices.begin(); it != allIndices.end(); ++it) {
      CIP_ENTRY localEntry;
      localEntry.reserve(16);

      // start by pushing on our neighbors' ranks:
      ROMol::OEDGE_ITER beg, end;
      boost::tie(beg, end) = mol.getAtomBonds(mol[*it].get());
      while (beg != end) {
        const Bond *bond = mol[*beg].get();
        ++beg;
        unsigned int nbrIdx = bond->getOtherAtomIdx(*it);
        const Atom *nbr = mol[nbrIdx].get();

        int rank = ranks[nbrIdx] + 1;
        // put the neighbor in 2N times where N is the bond order as a double.
        // this is to treat aromatic linkages on fair footing. i.e. at least in
        // the
        // first iteration --c(:c):c and --C(=C)-C should look the same.
        // this was part of issue 3009911

        unsigned int count;
        if (bond->getBondType() == Bond::DOUBLE && nbr->getAtomicNum() == 15 &&
            (nbr->getDegree() == 4 || nbr->getDegree() == 3)) {
          // a special case for chiral phophorous compounds
          // (this was leading to incorrect assignment of
          // R/S labels ):
          count = 1;

          // general justification of this is:
          // Paragraph 2.2. in the 1966 article is "Valence-Bond Conventions:
          // Multiple-Bond Unsaturation and Aromaticity". It contains several
          // conventions of which convention (b) is the one applying here:
          // "(b) Contibutions by d orbitals to bonds of quadriligant atoms are
          // neglected."
          // FIX: this applies to more than just P
        } else {
          count = static_cast<unsigned int>(
              floor(2. * bond->getBondTypeAsDouble() + .1));
        }
        CIP_ENTRY::iterator ePos =
            std::lower_bound(localEntry.begin(), localEntry.end(), rank);
        localEntry.insert(ePos, count, rank);
        ++nbr;
      }
      // add a zero for each coordinated H:
      // (as long as we're not a query atom)
      if (!mol[*it]->hasQuery()) {
        localEntry.insert(localEntry.begin(), mol[*it]->getTotalNumHs(), 0);
      }

      // we now have a sorted list of our neighbors' ranks,
      // copy it on in reversed order:
      cipEntries[*it].insert(cipEntries[*it].end(), localEntry.rbegin(),
                             localEntry.rend());
      if (cipEntries[*it].size() > longestEntry) {
        longestEntry = rdcast<unsigned int>(cipEntries[*it].size());
      }
    }
    // ----------------------------------------------------
    //
    // pad the entries so that we compare rounds to themselves:
    //
    for (INT_LIST_I it = allIndices.begin(); it != allIndices.end(); ++it) {
      unsigned int sz = rdcast<unsigned int>(cipEntries[*it].size());
      if (sz < longestEntry) {
        cipEntries[*it].insert(cipEntries[*it].end(), longestEntry - sz, -1);
      }
    }
    // ----------------------------------------------------
    //
    // sort the new ranks and update the list of active indices:
    //
    lastNumRanks = numRanks;

    Rankers::rankVect(cipEntries, ranks);
    numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1;

    // now truncate each vector and stick the rank at the end
    for (unsigned int i = 0; i < numAtoms; ++i) {
      cipEntries[i][numIts + 1] = ranks[i];
      cipEntries[i].erase(cipEntries[i].begin() + numIts + 2,
                          cipEntries[i].end());
    }

    ++numIts;
#ifdef VERBOSE_CANON
    BOOST_LOG(rdDebugLog) << "strings and ranks:" << std::endl;
    for (unsigned int i = 0; i < numAtoms; i++) {
      BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << " > ";
      debugVect(cipEntries[i]);
    }
#endif
  }
}
Example #2
0
void checkAndCorrectChiralityOfMatchingAtomsInProduct(
    const ROMol &reactant, unsigned reactantAtomIdx, const Atom &reactantAtom,
    RWMOL_SPTR product, ReactantProductAtomMapping *mapping) {
  for (unsigned i = 0; i < mapping->reactProdAtomMap[reactantAtomIdx].size();
       i++) {
    unsigned productAtomIdx = mapping->reactProdAtomMap[reactantAtomIdx][i];
    Atom *productAtom = product->getAtomWithIdx(productAtomIdx);

    if (productAtom->getChiralTag() != Atom::CHI_UNSPECIFIED ||
        reactantAtom.getChiralTag() == Atom::CHI_UNSPECIFIED ||
        reactantAtom.getChiralTag() == Atom::CHI_OTHER ||
        productAtom->hasProp(common_properties::molInversionFlag)) {
      continue;
    }
    // we can only do something sensible here if we have the same number of
    // bonds in the reactants and the products:
    if (reactantAtom.getDegree() != productAtom->getDegree()) {
      continue;
    }
    unsigned int nUnknown = 0;
    INT_LIST pOrder;
    ROMol::ADJ_ITER nbrIdx, endNbrs;
    boost::tie(nbrIdx, endNbrs) = product->getAtomNeighbors(productAtom);
    while (nbrIdx != endNbrs) {
      if (mapping->prodReactAtomMap.find(*nbrIdx) ==
              mapping->prodReactAtomMap.end() ||
          !reactant.getBondBetweenAtoms(reactantAtom.getIdx(),
                                        mapping->prodReactAtomMap[*nbrIdx])) {
        ++nUnknown;
        // if there's more than one bond in the product that doesn't correspond
        // to anything in the reactant, we're also doomed
        if (nUnknown > 1) break;
        // otherwise, add a -1 to the bond order that we'll fill in later
        pOrder.push_back(-1);
      } else {
        const Bond *rBond = reactant.getBondBetweenAtoms(
            reactantAtom.getIdx(), mapping->prodReactAtomMap[*nbrIdx]);
        CHECK_INVARIANT(rBond, "expected reactant bond not found");
        pOrder.push_back(rBond->getIdx());
      }
      ++nbrIdx;
    }
    if (nUnknown == 1) {
      // find the reactant bond that hasn't yet been accounted for:
      int unmatchedBond = -1;
      boost::tie(nbrIdx, endNbrs) = reactant.getAtomNeighbors(&reactantAtom);
      while (nbrIdx != endNbrs) {
        const Bond *rBond =
            reactant.getBondBetweenAtoms(reactantAtom.getIdx(), *nbrIdx);
        if (std::find(pOrder.begin(), pOrder.end(), rBond->getIdx()) ==
            pOrder.end()) {
          unmatchedBond = rBond->getIdx();
          break;
        }
        ++nbrIdx;
      }
      // what must be true at this point:
      //  1) there's a -1 in pOrder that we'll substitute for
      //  2) unmatchedBond contains the index of the substitution
      auto bPos = std::find(pOrder.begin(), pOrder.end(), -1);
      if (unmatchedBond >= 0 && bPos != pOrder.end()) {
        *bPos = unmatchedBond;
      }
      if (std::find(pOrder.begin(), pOrder.end(), -1) == pOrder.end()) {
        nUnknown = 0;
      }
    }
    if (!nUnknown) {
      productAtom->setChiralTag(reactantAtom.getChiralTag());
      int nSwaps = reactantAtom.getPerturbationOrder(pOrder);
      if (nSwaps % 2) {
        productAtom->invertChirality();
      }
    }
  }
}
Example #3
0
//
// Determine bond wedge state
///
Bond::BondDir DetermineBondWedgeState(const Bond *bond,
                                      const INT_MAP_INT &wedgeBonds,
                                      const Conformer *conf) {
    PRECONDITION(bond, "no bond");
    PRECONDITION(bond->getBondType() == Bond::SINGLE,
                 "bad bond order for wedging");
    const ROMol *mol = &(bond->getOwningMol());
    PRECONDITION(mol, "no mol");

    Bond::BondDir res = bond->getBondDir();
    if (!conf) {
        return res;
    }

    int bid = bond->getIdx();
    INT_MAP_INT_CI wbi = wedgeBonds.find(bid);
    if (wbi == wedgeBonds.end()) {
        return res;
    }

    unsigned int waid = wbi->second;

    Atom *atom, *bondAtom;  // = bond->getBeginAtom();
    if (bond->getBeginAtom()->getIdx() == waid) {
        atom = bond->getBeginAtom();
        bondAtom = bond->getEndAtom();
    } else {
        atom = bond->getEndAtom();
        bondAtom = bond->getBeginAtom();
    }

    Atom::ChiralType chiralType = atom->getChiralTag();
    CHECK_INVARIANT(chiralType == Atom::CHI_TETRAHEDRAL_CW ||
                    chiralType == Atom::CHI_TETRAHEDRAL_CCW,
                    "");

    // if we got this far, we really need to think about it:
    INT_LIST neighborBondIndices;
    DOUBLE_LIST neighborBondAngles;
    RDGeom::Point3D centerLoc, tmpPt;
    centerLoc = conf->getAtomPos(atom->getIdx());
    tmpPt = conf->getAtomPos(bondAtom->getIdx());
    centerLoc.z = 0.0;
    tmpPt.z = 0.0;
    RDGeom::Point3D refVect = centerLoc.directionVector(tmpPt);

    neighborBondIndices.push_back(bond->getIdx());
    neighborBondAngles.push_back(0.0);

    ROMol::OEDGE_ITER beg, end;
    boost::tie(beg, end) = mol->getAtomBonds(atom);
    while (beg != end) {
        Bond *nbrBond = (*mol)[*beg].get();
        Atom *otherAtom = nbrBond->getOtherAtom(atom);
        if (nbrBond != bond) {
            tmpPt = conf->getAtomPos(otherAtom->getIdx());
            tmpPt.z = 0.0;
            RDGeom::Point3D tmpVect = centerLoc.directionVector(tmpPt);
            double angle = refVect.signedAngleTo(tmpVect);
            if (angle < 0.0) angle += 2. * M_PI;
            INT_LIST::iterator nbrIt = neighborBondIndices.begin();
            DOUBLE_LIST::iterator angleIt = neighborBondAngles.begin();
            // find the location of this neighbor in our angle-sorted list
            // of neighbors:
            while (angleIt != neighborBondAngles.end() && angle > (*angleIt)) {
                ++angleIt;
                ++nbrIt;
            }
            neighborBondAngles.insert(angleIt, angle);
            neighborBondIndices.insert(nbrIt, nbrBond->getIdx());
        }
        ++beg;
    }

    // at this point, neighborBondIndices contains a list of bond
    // indices from the central atom.  They are arranged starting
    // at the reference bond in CCW order (based on the current
    // depiction).
    int nSwaps = atom->getPerturbationOrder(neighborBondIndices);

    // in the case of three-coordinated atoms we may have to worry about
    // the location of the implicit hydrogen - Issue 209
    // Check if we have one of these situation
    //
    //      0        1 0 2
    //      *         \*/
    //  1 - C - 2      C
    //
    // here the hydrogen will be between 1 and 2 and we need to add an additional
    // swap
    if (neighborBondAngles.size() == 3) {
        // three coordinated
        DOUBLE_LIST::iterator angleIt = neighborBondAngles.begin();
        ++angleIt;  // the first is the 0 (or reference bond - we will ignoire that
        double angle1 = (*angleIt);
        ++angleIt;
        double angle2 = (*angleIt);
        if (angle2 - angle1 >= (M_PI - 1e-4)) {
            // we have the above situation
            nSwaps++;
        }
    }

#ifdef VERBOSE_STEREOCHEM
    BOOST_LOG(rdDebugLog) << "--------- " << nSwaps << std::endl;
    std::copy(neighborBondIndices.begin(), neighborBondIndices.end(),
              std::ostream_iterator<int>(BOOST_LOG(rdDebugLog), " "));
    BOOST_LOG(rdDebugLog) << std::endl;
    std::copy(neighborBondAngles.begin(), neighborBondAngles.end(),
              std::ostream_iterator<double>(BOOST_LOG(rdDebugLog), " "));
    BOOST_LOG(rdDebugLog) << std::endl;
#endif
    if (chiralType == Atom::CHI_TETRAHEDRAL_CCW) {
        if (nSwaps % 2 == 1) {  // ^ reverse) {
            res = Bond::BEGINDASH;
        } else {
            res = Bond::BEGINWEDGE;
        }
    } else {
        if (nSwaps % 2 == 1) {  // ^ reverse) {
            res = Bond::BEGINWEDGE;
        } else {
            res = Bond::BEGINDASH;
        }
    }

    return res;
}