// Figure out the CIP ranks for the atoms of a molecule void assignAtomCIPRanks(const ROMol &mol, UINT_VECT &ranks) { PRECONDITION((!ranks.size() || ranks.size() >= mol.getNumAtoms()), "bad ranks size"); if (!ranks.size()) ranks.resize(mol.getNumAtoms()); unsigned int numAtoms = mol.getNumAtoms(); #ifndef USE_NEW_STEREOCHEMISTRY // get the initial invariants: DOUBLE_VECT invars(numAtoms, 0); buildCIPInvariants(mol, invars); iterateCIPRanks(mol, invars, ranks, false); #else Canon::chiralRankMolAtoms(mol, ranks); #endif // copy the ranks onto the atoms: for (unsigned int i = 0; i < numAtoms; ++i) { mol[i]->setProp(common_properties::_CIPRank, ranks[i], 1); } }
void iterateCIPRanks(const ROMol &mol, DOUBLE_VECT &invars, UINT_VECT &ranks, bool seedWithInvars) { PRECONDITION(invars.size() == mol.getNumAtoms(), "bad invars size"); PRECONDITION(ranks.size() >= mol.getNumAtoms(), "bad ranks size"); unsigned int numAtoms = mol.getNumAtoms(); CIP_ENTRY_VECT cipEntries(numAtoms); INT_LIST allIndices; for (unsigned int i = 0; i < numAtoms; ++i) { allIndices.push_back(i); } #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog) << "invariants:" << std::endl; for (unsigned int i = 0; i < numAtoms; i++) { BOOST_LOG(rdDebugLog) << i << ": " << invars[i] << std::endl; } #endif // rank those: Rankers::rankVect(invars, ranks); #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog) << "initial ranks:" << std::endl; for (unsigned int i = 0; i < numAtoms; ++i) { BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << std::endl; } #endif // Start each atom's rank vector with its atomic number: // Note: in general one should avoid the temptation to // use invariants here, those lead to incorrect answers for (unsigned int i = 0; i < numAtoms; i++) { if (!seedWithInvars) { cipEntries[i].push_back(mol[i]->getAtomicNum()); cipEntries[i].push_back(static_cast<int>(ranks[i])); } else { cipEntries[i].push_back(static_cast<int>(invars[i])); } } // Loop until either: // 1) all classes are uniquified // 2) the number of ranks doesn't change from one iteration to // the next // 3) we've gone through maxIts times // maxIts is calculated by dividing the number of atoms // by 2. That's a pessimal version of the // maximum number of steps required for two atoms to // "feel" each other (each influences one additional // neighbor shell per iteration). unsigned int maxIts = numAtoms / 2 + 1; unsigned int numIts = 0; int lastNumRanks = -1; unsigned int numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1; while (numRanks < numAtoms && numIts < maxIts && (lastNumRanks < 0 || static_cast<unsigned int>(lastNumRanks) < numRanks)) { unsigned int longestEntry = 0; // ---------------------------------------------------- // // for each atom, get a sorted list of its neighbors' ranks: // for (INT_LIST_I it = allIndices.begin(); it != allIndices.end(); ++it) { CIP_ENTRY localEntry; localEntry.reserve(16); // start by pushing on our neighbors' ranks: ROMol::OEDGE_ITER beg, end; boost::tie(beg, end) = mol.getAtomBonds(mol[*it].get()); while (beg != end) { const Bond *bond = mol[*beg].get(); ++beg; unsigned int nbrIdx = bond->getOtherAtomIdx(*it); const Atom *nbr = mol[nbrIdx].get(); int rank = ranks[nbrIdx] + 1; // put the neighbor in 2N times where N is the bond order as a double. // this is to treat aromatic linkages on fair footing. i.e. at least in // the // first iteration --c(:c):c and --C(=C)-C should look the same. // this was part of issue 3009911 unsigned int count; if (bond->getBondType() == Bond::DOUBLE && nbr->getAtomicNum() == 15 && (nbr->getDegree() == 4 || nbr->getDegree() == 3)) { // a special case for chiral phophorous compounds // (this was leading to incorrect assignment of // R/S labels ): count = 1; // general justification of this is: // Paragraph 2.2. in the 1966 article is "Valence-Bond Conventions: // Multiple-Bond Unsaturation and Aromaticity". It contains several // conventions of which convention (b) is the one applying here: // "(b) Contibutions by d orbitals to bonds of quadriligant atoms are // neglected." // FIX: this applies to more than just P } else { count = static_cast<unsigned int>( floor(2. * bond->getBondTypeAsDouble() + .1)); } CIP_ENTRY::iterator ePos = std::lower_bound(localEntry.begin(), localEntry.end(), rank); localEntry.insert(ePos, count, rank); ++nbr; } // add a zero for each coordinated H: // (as long as we're not a query atom) if (!mol[*it]->hasQuery()) { localEntry.insert(localEntry.begin(), mol[*it]->getTotalNumHs(), 0); } // we now have a sorted list of our neighbors' ranks, // copy it on in reversed order: cipEntries[*it].insert(cipEntries[*it].end(), localEntry.rbegin(), localEntry.rend()); if (cipEntries[*it].size() > longestEntry) { longestEntry = rdcast<unsigned int>(cipEntries[*it].size()); } } // ---------------------------------------------------- // // pad the entries so that we compare rounds to themselves: // for (INT_LIST_I it = allIndices.begin(); it != allIndices.end(); ++it) { unsigned int sz = rdcast<unsigned int>(cipEntries[*it].size()); if (sz < longestEntry) { cipEntries[*it].insert(cipEntries[*it].end(), longestEntry - sz, -1); } } // ---------------------------------------------------- // // sort the new ranks and update the list of active indices: // lastNumRanks = numRanks; Rankers::rankVect(cipEntries, ranks); numRanks = *std::max_element(ranks.begin(), ranks.end()) + 1; // now truncate each vector and stick the rank at the end for (unsigned int i = 0; i < numAtoms; ++i) { cipEntries[i][numIts + 1] = ranks[i]; cipEntries[i].erase(cipEntries[i].begin() + numIts + 2, cipEntries[i].end()); } ++numIts; #ifdef VERBOSE_CANON BOOST_LOG(rdDebugLog) << "strings and ranks:" << std::endl; for (unsigned int i = 0; i < numAtoms; i++) { BOOST_LOG(rdDebugLog) << i << ": " << ranks[i] << " > "; debugVect(cipEntries[i]); } #endif } }