void testFragmentOnBRICSBonds() { BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl; BOOST_LOG(rdInfoLog) << "Testing fragmentOnBRICSBonds"<< std::endl; { std::string smi = "c1ccccc1OC"; RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); TEST_ASSERT(mol->getNumAtoms()==8); TEST_ASSERT(mol->getBondBetweenAtoms(5,6)); TEST_ASSERT(mol->getBondBetweenAtoms(6,7)); std::vector<MolFragmenter::FragmenterBondType> fbts; MolFragmenter::constructBRICSBondTypes(fbts); ROMol *nmol=MolFragmenter::fragmentOnBonds(*mol,fbts); TEST_ASSERT(nmol); TEST_ASSERT(nmol->getNumAtoms()==10); TEST_ASSERT(!nmol->getBondBetweenAtoms(5,6)); TEST_ASSERT(nmol->getBondBetweenAtoms(6,7)); smi = MolToSmiles(*nmol,true); TEST_ASSERT(smi=="[3*]OC.[16*]c1ccccc1"); TEST_ASSERT(nmol->getAtomWithIdx(8)->getAtomicNum()==0); TEST_ASSERT(nmol->getAtomWithIdx(8)->getIsotope()==3); TEST_ASSERT(nmol->getBondBetweenAtoms(6,8)); TEST_ASSERT(nmol->getAtomWithIdx(9)->getAtomicNum()==0); TEST_ASSERT(nmol->getAtomWithIdx(9)->getIsotope()==16); TEST_ASSERT(nmol->getBondBetweenAtoms(5,9)); delete mol; delete nmol; } { std::string smi = "c1ccccc1"; RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); TEST_ASSERT(mol->getNumAtoms()==6); std::vector<MolFragmenter::FragmenterBondType> fbts; MolFragmenter::constructBRICSBondTypes(fbts); ROMol *nmol=MolFragmenter::fragmentOnBonds(*mol,fbts); TEST_ASSERT(nmol); TEST_ASSERT(nmol->getNumAtoms()==6); smi = MolToSmiles(*nmol,true); TEST_ASSERT(smi=="c1ccccc1"); delete mol; delete nmol; } { std::string smi = "OC(C)=CC"; RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); TEST_ASSERT(mol->getNumAtoms()==5); std::vector<MolFragmenter::FragmenterBondType> fbts; MolFragmenter::constructBRICSBondTypes(fbts); ROMol *nmol=MolFragmenter::fragmentOnBonds(*mol,fbts); TEST_ASSERT(nmol); TEST_ASSERT(nmol->getNumAtoms()==7); smi = MolToSmiles(*nmol,true); TEST_ASSERT(smi=="[7*]=CC.[7*]=C(C)O"); delete mol; delete nmol; } { std::string smi = "c1ccccc1OC"; RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); TEST_ASSERT(mol->getNumAtoms()==8); ROMol *nmol=MolFragmenter::fragmentOnBRICSBonds(*mol); TEST_ASSERT(nmol); TEST_ASSERT(nmol->getNumAtoms()==10); smi = MolToSmiles(*nmol,true); TEST_ASSERT(smi=="[3*]OC.[16*]c1ccccc1"); delete mol; delete nmol; } { std::string smi = "OC(C)=CC"; RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); TEST_ASSERT(mol->getNumAtoms()==5); ROMol *nmol=MolFragmenter::fragmentOnBRICSBonds(*mol); TEST_ASSERT(nmol); TEST_ASSERT(nmol->getNumAtoms()==7); smi = MolToSmiles(*nmol,true); TEST_ASSERT(smi=="[7*]=CC.[7*]=C(C)O"); delete mol; delete nmol; } { std::string smi = "CCCOCCC(=O)c1ccccc1"; RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); TEST_ASSERT(mol->getNumAtoms()==14); ROMol *nmol=MolFragmenter::fragmentOnBRICSBonds(*mol); TEST_ASSERT(nmol); TEST_ASSERT(nmol->getNumAtoms()==20); smi = MolToSmiles(*nmol,true); TEST_ASSERT(smi=="[3*]O[3*].[4*]CCC.[4*]CCC([6*])=O.[16*]c1ccccc1"); MolOps::sanitizeMol(static_cast<RWMol &>(*nmol)); smi = MolToSmiles(*nmol,true); TEST_ASSERT(smi=="[3*]O[3*].[4*]CCC.[4*]CCC([6*])=O.[16*]c1ccccc1"); delete mol; delete nmol; } { std::string smi = "Cl.CC(=O)O[C@]1(c2ccccc2)CCN(C)[C@H]2CCCC[C@@H]21"; RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); TEST_ASSERT(mol->getNumAtoms()==22); ROMol *nmol=MolFragmenter::fragmentOnBRICSBonds(*mol); TEST_ASSERT(nmol); TEST_ASSERT(nmol->getNumAtoms()==28); smi = MolToSmiles(*nmol,true); //std::cerr<<smi<<std::endl; TEST_ASSERT(smi=="Cl.[1*]C(C)=O.[3*]O[3*].[15*]C1([15*])CCN(C)[C@H]2CCCC[C@@H]21.[16*]c1ccccc1"); MolOps::sanitizeMol(static_cast<RWMol &>(*nmol)); smi = MolToSmiles(*nmol,true); TEST_ASSERT(smi=="Cl.[1*]C(C)=O.[3*]O[3*].[15*]C1([15*])CCN(C)[C@H]2CCCC[C@@H]21.[16*]c1ccccc1"); delete mol; delete nmol; } BOOST_LOG(rdInfoLog) << "\tdone" << std::endl; }
void deletebonds(const ROMol &mol, String ftype, int hac){ RWMol *newMol = static_cast<RWMol*>(new ROMol(mol,false)); int total_acyclic = 0; int total_cyclic = 0; //find the relevant bonds to break // Single acyclic Cuts if(ftype == acyc_smarts){ acyclic_matching_atoms = mol.getSubstructMatches(acyc); total_acyclic = acyclic_matching_atoms.size(); MatchVectType acyclic_matching_atoms; std::vector<int> bonds_selected; Match_Vect bonds_selected; SubstructMatch(mol, acyc, acyclic_matching_atoms); // if we didn't find any matches, there's nothing to be done here // simply return a list with a copy of the starting molecule if (acyclic_matching_atoms.size() == 0) { newMol.push_back(ROMOL_SPTR(new ROMol(mol,false))); newMol[0]->clearComputedProps(false); return newMol; for(MatchVectType::const_iterator mvit=acyclic_matching_atoms.begin(); mvit!=acyclic_matching_atoms.end(); mvit++){ bonds_selected.push_back(mvit->first); bonds_selected.push_back(mvit->second); bonds_selected[0] = mvit->first; bonds_selected[1] = mvit->second; Atom *at1 = newMol->getAtomWithIdx(bonds_selected[0]); Atom *at2 = newMol->getAtomWithIdx(bonds_selected[1]); Atom *atom0 = newMol->getAtomWithIdx(0); Atom *atom1 = newMol->getAtomWithIdx(0); newMol->removeBond(bonds_selected[0], bonds_selected[1]);// Break the bond with idx=at1, at2. // Introduce two dummy atoms in the molecule newMol->addAtom(atom0); newMol->addAtom(atom1); // Bond the dummy atoms to the new terminal atoms Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[0],atom0->getIdx()); CHECK_INVARIANT(oBond,"required bond not found"); newMol->addBond(at1, *atom0,Bond::SINGLE); Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[1],atom1->getIdx()); CHECK_INVARIANT(oBond,"required bond not found"); newMol->addBond(at2, *atom1,Bond::SINGLE); break; } //Now get the modified fragment in smiles(i.e. smi2); String smi2 = MolToSmiles(*newMol); //printf (smi2); } } //cyclic Cuts if(ftype == cyc_smarts){ cyclic_matching_atoms = mol.getSubstructMatches(cyc); int total_cyclic = cyclic_matching_atoms.size(); MatchVectType cyclic_matching_atoms; std::vector<int> bonds_selected; Match_Vect bonds_selected; SubstructMatch(mol, cyc, cyclic_matching_atoms); // if we didn't find any matches, there's nothing to be done here // simply return a list with a copy of the starting molecule if (cyclic_matching_atoms.size() == 0) { newMol.push_back(ROMOL_SPTR(new ROMol(mol,false))); newMol[0]->clearComputedProps(false); return newMol; for(MatchVectType::const_iterator mvit=cyclic_matching_atoms.begin(); mvit!=cyclic_matching_atoms.end(); mvit++){ bonds_selected.push_back(mvit->first); bonds_selected.push_back(mvit->second); bonds_selected[0] = mvit->first; bonds_selected[1] = mvit->second; Atom *at1 = newMol->getAtomWithIdx(bonds_selected[0]); Atom *at2 = newMol->getAtomWithIdx(bonds_selected[1]); Atom *atom0 = newMol->getAtomWithIdx(0); Atom *atom1 = newMol->getAtomWithIdx(0); newMol->removeBond(bonds_selected[0], bonds_selected[1]);// Break the bond with idx=at1, at2. // Introduce two dummy atoms in the molecule newMol->addAtom(atom0); newMol->addAtom(atom1); // Bond the dummy atoms to the new terminal atoms Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[0],atom0->getIdx()); CHECK_INVARIANT(oBond,"required bond not found"); newMol->addBond(at1, *atom0,Bond::SINGLE); Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[1],atom1->getIdx()); CHECK_INVARIANT(oBond,"required bond not found"); newMol->addBond(at2, *atom1,Bond::SINGLE); continue; } //Now get the modified fragment in smiles(i.e. smi2); String smi2 = MolToSmiles(*newMol); //printf (smi2); //now do an acyclic cut with the successful cyclic cut on the mol for(MatchVectType::const_iterator mvit=acyclic_matching_atoms.begin(); mvit!=acyclic_matching_atoms.end(); mvit++){ bonds_selected.push_back(mvit->first); bonds_selected.push_back(mvit->second); bonds_selected[0] = mvit->first; bonds_selected[1] = mvit->second; Atom *at1 = newMol->getAtomWithIdx(bonds_selected[0]); Atom *at2 = newMol->getAtomWithIdx(bonds_selected[1]); Atom *atom0 = newMol->getAtomWithIdx(0); Atom *atom1 = newMol->getAtomWithIdx(0); newMol->removeBond(bonds_selected[0], bonds_selected[1]);// Break the bond with idx=at1, at2. // Introduce two dummy atoms in the molecule newMol->addAtom(atom0); newMol->addAtom(atom1); // Bond the dummy atoms to the new terminal atoms Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[0],atom0->getIdx()); CHECK_INVARIANT(oBond,"required bond not found"); newMol->addBond(at1, *nbrIdx,oBond->getBondType()); Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[1],atom1->getIdx()); CHECK_INVARIANT(oBond,"required bond not found"); newMol->addBond(at2, *atom1,Bond::SINGLE); continue; } //Now get the modified fragment in smiles(i.e. smi2); String smi2 = MolToSmiles(*newMol); //printf (smi2); } } //determine whether ring cut is valid String cSma1 = ("[#0][r].[r][#0]"); static ROMol *Sma1 = SmartsToMol("[#0][r].[r][#0]"); String cSma2 = ("[#0][r][#0]"); static ROMol *Sma2 = SmartsToMol("[#0][r][#0]"); void is_ring_cut_valid(ROMol *fMol, ROMol *Sma1, ROMol *Sma2){ //to check is a fragment is a valid ring cut, it needs to match the //smarts: [$([#0][r].[r][#0]),$([#0][r][#0])] boolean valid = false; ROMol *m = new RWMol(); //if m is not None: if (m != NULL){ //use global smarts if(m->hasSubstructMatch(Sma1) || m->hasSubstructMatch(Sma2)){ int atom_count = m->getNumAtoms(); valid = true; } } }
void testFragmentOnBonds() { BOOST_LOG(rdInfoLog) << "-------------------------------------" << std::endl; BOOST_LOG(rdInfoLog) << "Testing fragmentOnBonds"<< std::endl; { std::string smi = "OCCCN"; RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); TEST_ASSERT(mol->getNumAtoms()==5); unsigned int indices[]={0,3}; std::vector<unsigned int> bindices(indices,indices+(sizeof(indices)/sizeof(indices[0]))); ROMol *nmol=MolFragmenter::fragmentOnBonds(*mol,bindices,false); TEST_ASSERT(nmol); TEST_ASSERT(nmol->getNumAtoms()==5); delete mol; delete nmol; } { std::string smi = "OCCCN"; RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); TEST_ASSERT(mol->getNumAtoms()==5); unsigned int indices[]={0,1}; std::vector<unsigned int> bindices(indices,indices+(sizeof(indices)/sizeof(indices[0]))); std::vector<unsigned int> cutsPerAtom(mol->getNumAtoms()); ROMol *nmol=MolFragmenter::fragmentOnBonds(*mol,bindices,false,0,0,&cutsPerAtom); TEST_ASSERT(nmol); TEST_ASSERT(nmol->getNumAtoms()==5); TEST_ASSERT(cutsPerAtom[0]==1); TEST_ASSERT(cutsPerAtom[1]==2); TEST_ASSERT(cutsPerAtom[2]==1); TEST_ASSERT(cutsPerAtom[3]==0); delete mol; delete nmol; } { std::string smi = "OCCCN"; RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); TEST_ASSERT(mol->getNumAtoms()==5); TEST_ASSERT(mol->getBondBetweenAtoms(0,1)); TEST_ASSERT(mol->getBondBetweenAtoms(3,4)); unsigned int indices[]={0,3}; std::vector<unsigned int> bindices(indices,indices+(sizeof(indices)/sizeof(indices[0]))); ROMol *nmol=MolFragmenter::fragmentOnBonds(*mol,bindices); TEST_ASSERT(nmol); TEST_ASSERT(nmol->getNumAtoms()==9); TEST_ASSERT(!nmol->getBondBetweenAtoms(0,1)); TEST_ASSERT(!nmol->getBondBetweenAtoms(3,4)); TEST_ASSERT(nmol->getAtomWithIdx(5)->getAtomicNum()==0); TEST_ASSERT(nmol->getAtomWithIdx(5)->getIsotope()==0); TEST_ASSERT(nmol->getBondBetweenAtoms(1,5)); TEST_ASSERT(nmol->getAtomWithIdx(6)->getAtomicNum()==0); TEST_ASSERT(nmol->getAtomWithIdx(6)->getIsotope()==1); TEST_ASSERT(nmol->getBondBetweenAtoms(0,6)); TEST_ASSERT(nmol->getAtomWithIdx(7)->getAtomicNum()==0); TEST_ASSERT(nmol->getAtomWithIdx(7)->getIsotope()==3); TEST_ASSERT(nmol->getBondBetweenAtoms(4,7)); TEST_ASSERT(nmol->getAtomWithIdx(8)->getAtomicNum()==0); TEST_ASSERT(nmol->getAtomWithIdx(8)->getIsotope()==4); TEST_ASSERT(nmol->getBondBetweenAtoms(3,8)); delete mol; delete nmol; } { std::string smi = "OCCCN"; RWMol *mol = SmilesToMol(smi); TEST_ASSERT(mol); TEST_ASSERT(mol->getNumAtoms()==5); TEST_ASSERT(mol->getBondBetweenAtoms(0,1)); TEST_ASSERT(mol->getBondBetweenAtoms(3,4)); unsigned int indices[]={0,3}; std::vector<unsigned int> bindices(indices,indices+(sizeof(indices)/sizeof(indices[0]))); std::vector< std::pair<unsigned int,unsigned int> > dummyLabels(2); dummyLabels[0] =std::make_pair(10,11); dummyLabels[1] =std::make_pair(100,110); ROMol *nmol=MolFragmenter::fragmentOnBonds(*mol,bindices,true,&dummyLabels); TEST_ASSERT(nmol); TEST_ASSERT(nmol->getNumAtoms()==9); TEST_ASSERT(!nmol->getBondBetweenAtoms(0,1)); TEST_ASSERT(!nmol->getBondBetweenAtoms(3,4)); TEST_ASSERT(nmol->getAtomWithIdx(5)->getAtomicNum()==0); TEST_ASSERT(nmol->getAtomWithIdx(5)->getIsotope()==10); TEST_ASSERT(nmol->getBondBetweenAtoms(1,5)); TEST_ASSERT(nmol->getAtomWithIdx(6)->getAtomicNum()==0); TEST_ASSERT(nmol->getAtomWithIdx(6)->getIsotope()==11); TEST_ASSERT(nmol->getBondBetweenAtoms(0,6)); TEST_ASSERT(nmol->getAtomWithIdx(7)->getAtomicNum()==0); TEST_ASSERT(nmol->getAtomWithIdx(7)->getIsotope()==100); TEST_ASSERT(nmol->getBondBetweenAtoms(4,7)); TEST_ASSERT(nmol->getAtomWithIdx(8)->getAtomicNum()==0); TEST_ASSERT(nmol->getAtomWithIdx(8)->getIsotope()==110); TEST_ASSERT(nmol->getBondBetweenAtoms(3,8)); delete mol; delete nmol; } BOOST_LOG(rdInfoLog) << "\tdone" << std::endl; }
bool kekulizeWorker(RWMol &mol, const INT_VECT &allAtms, boost::dynamic_bitset<> dBndCands, boost::dynamic_bitset<> dBndAdds, INT_VECT done, unsigned int maxBackTracks) { INT_DEQUE astack; INT_INT_DEQ_MAP options; int lastOpt = -1; boost::dynamic_bitset<> localBondsAdded(mol.getNumBonds()); // ok the algorithm goes something like this // - start with an atom that has been marked aromatic before // - check if it can have a double bond // - add its neighbors to the stack // - check if one of its neighbors can also have a double bond // - if yes add a double bond. // - if multiple neighbors can have double bonds - add them to a // options stack we may have to retrace out path if we chose the // wrong neighbor to add the double bond // - if double bond added update the candidates for double bond // - move to the next atom on the stack and repeat the process // - if an atom that can have multiple a double bond has no // neighbors that can take double bond - we made a mistake // earlier by picking a wrong candidate for double bond // - in this case back track to where we made the mistake int curr; INT_DEQUE btmoves; unsigned int numBT = 0; // number of back tracks so far while ((done.size() < allAtms.size()) || (astack.size() > 0)) { // pick a curr atom to work with if (astack.size() > 0) { curr = astack.front(); astack.pop_front(); } else { for (int allAtm : allAtms) { if (std::find(done.begin(), done.end(), allAtm) == done.end()) { curr = allAtm; break; } } } done.push_back(curr); // loop over the neighbors if we can add double bonds or // simply push them onto the stack INT_DEQUE opts; bool cCand = false; if (dBndCands[curr]) { cCand = true; } int ncnd; // if we are here because of backtracking if (options.find(curr) != options.end()) { opts = options[curr]; CHECK_INVARIANT(opts.size() > 0, ""); } else { RWMol::ADJ_ITER nbrIdx, endNbrs; boost::tie(nbrIdx, endNbrs) = mol.getAtomNeighbors(mol.getAtomWithIdx(curr)); while (nbrIdx != endNbrs) { // ignore if the neighbor has already been dealt with before if (std::find(done.begin(), done.end(), static_cast<int>(*nbrIdx)) != done.end()) { ++nbrIdx; continue; } // ignore if the neighbor is not part of the fused system if (std::find(allAtms.begin(), allAtms.end(), static_cast<int>(*nbrIdx)) == allAtms.end()) { ++nbrIdx; continue; } // if the neighbor is not on the stack add it if (std::find(astack.begin(), astack.end(), static_cast<int>(*nbrIdx)) == astack.end()) { astack.push_back(rdcast<int>(*nbrIdx)); } // check if the neighbor is also a candidate for a double bond // the refinement that we'll make to the candidate check we've already // done is to make sure that the bond is either flagged as aromatic // or involves a dummy atom. This was Issue 3525076. // This fix is not really 100% of the way there: a situation like // that for Issue 3525076 but involving a dummy atom in the cage // could lead to the same failure. The full fix would require // a fairly detailed analysis of all bonds in the molecule to determine // which of them is eligible to be converted. if (cCand && dBndCands[*nbrIdx] && (mol.getBondBetweenAtoms(curr, *nbrIdx)->getIsAromatic() || mol.getAtomWithIdx(curr)->getAtomicNum() == 0 || mol.getAtomWithIdx(*nbrIdx)->getAtomicNum() == 0)) { opts.push_back(rdcast<int>(*nbrIdx)); } // end of curr atoms can have a double bond ++nbrIdx; } // end of looping over neighbors } // now add a double bond from current to one of the neighbors if we can if (cCand) { if (opts.size() > 0) { ncnd = opts.front(); opts.pop_front(); Bond *bnd = mol.getBondBetweenAtoms(curr, ncnd); bnd->setBondType(Bond::DOUBLE); // remove current and the neighbor from the dBndCands list dBndCands[curr] = 0; dBndCands[ncnd] = 0; // add them to the list of bonds to which have been made double dBndAdds[bnd->getIdx()] = 1; localBondsAdded[bnd->getIdx()] = 1; // if this is an atom we previously visted and picked we // simply tried a different option now, overwrite the options // stored for this atoms if (options.find(curr) != options.end()) { if (opts.size() == 0) { options.erase(curr); btmoves.pop_back(); if (btmoves.size() > 0) { lastOpt = btmoves.back(); } else { lastOpt = -1; } } else { options[curr] = opts; } } else { // this is new atoms we are trying and have other // neighbors as options to add double bond store this to // the options stack, we may have made a mistake in // which one we chose and have to return here if (opts.size() > 0) { lastOpt = curr; btmoves.push_back(lastOpt); options[curr] = opts; } } } // end of adding a double bond else { // we have an atom that should be getting a double bond // but none of the neighbors can take one. Most likely // because of a wrong choice earlier so back track if ((lastOpt >= 0) && (numBT < maxBackTracks)) { // std::cerr << "PRE BACKTRACK" << std::endl; // mol.debugMol(std::cerr); backTrack(mol, options, lastOpt, done, astack, dBndCands, dBndAdds); // std::cerr << "POST BACKTRACK" << std::endl; // mol.debugMol(std::cerr); numBT++; } else { // undo any remaining changes we made while here // this was github #962 for (unsigned int bidx = 0; bidx < mol.getNumBonds(); ++bidx) { if (localBondsAdded[bidx]) { mol.getBondWithIdx(bidx)->setBondType(Bond::SINGLE); } } return false; } } // end of else try to backtrack } // end of curr atom atom being a cand for double bond } // end of while we are not done with all atoms return true; }