Beispiel #1
0
void testPaths2() {
  std::cout << "-----------------------\n Path retrieval2" << std::endl;
  // build: CCC(C)CC
  RWMol mol;
  bool updateLabel = true;
  bool takeOwnership = true;
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addBond(0, 1, Bond::SINGLE);
  mol.addBond(1, 2, Bond::SINGLE);
  mol.addBond(2, 3, Bond::SINGLE);
  mol.addBond(2, 0, Bond::SINGLE);

  //
  //  Retrieve using bonds
  //
  PATH_LIST tmp = findAllPathsOfLengthN(mol, 3);
  // std::cout << "\n3:" << std::endl;
  // dumpVIV(tmp);
  CHECK_INVARIANT(tmp.size() == 3, "");

  std::cout << "Finished" << std::endl;
}
Beispiel #2
0
void CleanupMolecule() {
  // an example of doing some cleaning up of a molecule before
  // calling the sanitizeMol function()

  // build: C1CC1C(:O):O
  RWMol *mol = new RWMol();

  // add atoms and bonds:
  mol->addAtom(new Atom(6));           // atom 0
  mol->addAtom(new Atom(6));           // atom 1
  mol->addAtom(new Atom(6));           // atom 2
  mol->addAtom(new Atom(6));           // atom 3
  mol->addAtom(new Atom(8));           // atom 4
  mol->addAtom(new Atom(8));           // atom 5
  mol->addBond(3, 4, Bond::AROMATIC);  // bond 0
  mol->addBond(3, 5, Bond::AROMATIC);  // bond 1
  mol->addBond(3, 2, Bond::SINGLE);    // bond 2
  mol->addBond(2, 1, Bond::SINGLE);    // bond 3
  mol->addBond(1, 0, Bond::SINGLE);    // bond 4
  mol->addBond(0, 2, Bond::SINGLE);    // bond 5

  // instead of calling sanitize mol, which would generate an error,
  // we'll perceive the rings, then take care of aromatic bonds
  // that aren't in a ring, then sanitize:
  MolOps::findSSSR(*mol);
  for (ROMol::BondIterator bondIt = mol->beginBonds();
       bondIt != mol->endBonds(); ++bondIt) {
    if (((*bondIt)->getIsAromatic() ||
         (*bondIt)->getBondType() == Bond::AROMATIC) &&
        !mol->getRingInfo()->numBondRings((*bondIt)->getIdx())) {
      // remove the aromatic flag on the bond:
      (*bondIt)->setIsAromatic(false);
      // and cleanup its attached atoms as well (they were
      // also marked aromatic when the bond was added)
      (*bondIt)->getBeginAtom()->setIsAromatic(false);
      (*bondIt)->getEndAtom()->setIsAromatic(false);

      // NOTE: this isn't really reasonable:
      (*bondIt)->setBondType(Bond::SINGLE);
    }
  }

  // now it's safe to sanitize:
  RDKit::MolOps::sanitizeMol(*mol);

  // Get the canonical SMILES, include stereochemistry:
  std::string smiles;
  smiles = MolToSmiles(*(static_cast<ROMol *>(mol)), true);
  BOOST_LOG(rdInfoLog) << " fixed SMILES: " << smiles << std::endl;
}
Beispiel #3
0
void BuildSimpleMolecule() {
  // build the molecule: C/C=C\C
  RWMol *mol = new RWMol();

  // add atoms and bonds:
  mol->addAtom(new Atom(6));         // atom 0
  mol->addAtom(new Atom(6));         // atom 1
  mol->addAtom(new Atom(6));         // atom 2
  mol->addAtom(new Atom(6));         // atom 3
  mol->addBond(0, 1, Bond::SINGLE);  // bond 0
  mol->addBond(1, 2, Bond::DOUBLE);  // bond 1
  mol->addBond(2, 3, Bond::SINGLE);  // bond 2
  // setup the stereochem:
  mol->getBondWithIdx(0)->setBondDir(Bond::ENDUPRIGHT);
  mol->getBondWithIdx(2)->setBondDir(Bond::ENDDOWNRIGHT);

  // do the chemistry perception:
  RDKit::MolOps::sanitizeMol(*mol);

  // Get the canonical SMILES, include stereochemistry:
  std::string smiles;
  smiles = MolToSmiles(*(static_cast<ROMol *>(mol)), true);
  BOOST_LOG(rdInfoLog) << " sample 1 SMILES: " << smiles << std::endl;
}
Beispiel #4
0
void testUniqueSubgraphs() {
  std::cout << "-----------------------\n Unique Subgraph retrieval"
            << std::endl;
  // build: CCC(C)CC
  RWMol mol;
  bool updateLabel = true;
  bool takeOwnership = true;
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addBond(0, 1, Bond::SINGLE);
  mol.addBond(1, 2, Bond::SINGLE);
  mol.addBond(2, 3, Bond::SINGLE);
  mol.addBond(2, 4, Bond::SINGLE);
  mol.addBond(3, 5, Bond::SINGLE);

  PATH_LIST tmp;
  PATH_LIST::iterator i;

  tmp = findAllSubgraphsOfLengthN(mol, 1);
  CHECK_INVARIANT(tmp.size() == 5, "");

  tmp = findAllSubgraphsOfLengthN(mol, 2);
  CHECK_INVARIANT(tmp.size() == 5, "");
  tmp = findUniqueSubgraphsOfLengthN(mol, 2);
  CHECK_INVARIANT(tmp.size() == 1, "");

  tmp = findAllSubgraphsOfLengthN(mol, 3);
  CHECK_INVARIANT(tmp.size() == 5, "");
  tmp = findUniqueSubgraphsOfLengthN(mol, 3);
  CHECK_INVARIANT(tmp.size() == 2, "");

  tmp = findAllSubgraphsOfLengthN(mol, 4);
  CHECK_INVARIANT(tmp.size() == 3, "");
  tmp = findUniqueSubgraphsOfLengthN(mol, 4);
  CHECK_INVARIANT(tmp.size() == 2, "");

  tmp = findAllSubgraphsOfLengthN(mol, 5);
  CHECK_INVARIANT(tmp.size() == 1, "");
  tmp = findUniqueSubgraphsOfLengthN(mol, 5);
  CHECK_INVARIANT(tmp.size() == 1, "");

  tmp = findAllSubgraphsOfLengthN(mol, 6);
  CHECK_INVARIANT(tmp.empty(), "");
  tmp = findUniqueSubgraphsOfLengthN(mol, 6);
  CHECK_INVARIANT(tmp.empty(), "");

  // add an H and make sure things don't change:
  mol.addAtom(new Atom(1), updateLabel, takeOwnership);
  mol.addBond(5, 6, Bond::SINGLE);

  tmp = findAllSubgraphsOfLengthN(mol, 2);
  CHECK_INVARIANT(tmp.size() == 5, "");
  tmp = findUniqueSubgraphsOfLengthN(mol, 2);
  CHECK_INVARIANT(tmp.size() == 1, "");

  tmp = findAllSubgraphsOfLengthN(mol, 3);
  CHECK_INVARIANT(tmp.size() == 5, "");
  tmp = findUniqueSubgraphsOfLengthN(mol, 3);
  CHECK_INVARIANT(tmp.size() == 2, "");

  tmp = findAllSubgraphsOfLengthN(mol, 4);
  CHECK_INVARIANT(tmp.size() == 3, "");
  tmp = findUniqueSubgraphsOfLengthN(mol, 4);
  CHECK_INVARIANT(tmp.size() == 2, "");

  tmp = findAllSubgraphsOfLengthN(mol, 5);
  CHECK_INVARIANT(tmp.size() == 1, "");
  tmp = findUniqueSubgraphsOfLengthN(mol, 5);
  CHECK_INVARIANT(tmp.size() == 1, "");

  tmp = findAllSubgraphsOfLengthN(mol, 6);
  CHECK_INVARIANT(tmp.empty(), "");
  tmp = findUniqueSubgraphsOfLengthN(mol, 6);
  CHECK_INVARIANT(tmp.empty(), "");

  std::cout << "Finished" << std::endl;
}
Beispiel #5
0
void testSubgraphs() {
  std::cout << "-----------------------\n Subgraph retrieval" << std::endl;
  // build: CCC(C)CC
  RWMol mol;
  bool updateLabel = true;
  bool takeOwnership = true;
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addBond(0, 1, Bond::SINGLE);
  mol.addBond(1, 2, Bond::SINGLE);
  mol.addBond(2, 3, Bond::SINGLE);
  mol.addBond(2, 4, Bond::SINGLE);
  mol.addBond(3, 5, Bond::SINGLE);

  PATH_LIST tmp;
  PATH_LIST::iterator i;

  int totPs = 0;
  tmp = findAllSubgraphsOfLengthN(mol, 1);
  CHECK_INVARIANT(tmp.size() == 5, "");
  totPs += tmp.size();
  tmp = findAllSubgraphsOfLengthN(mol, 2);
  CHECK_INVARIANT(tmp.size() == 5, "");
  totPs += tmp.size();
  tmp = findAllSubgraphsOfLengthN(mol, 3);
  CHECK_INVARIANT(tmp.size() == 5, "");
  totPs += tmp.size();
  tmp = findAllSubgraphsOfLengthN(mol, 4);
  CHECK_INVARIANT(tmp.size() == 3, "");
  totPs += tmp.size();
  tmp = findAllSubgraphsOfLengthN(mol, 5);
  CHECK_INVARIANT(tmp.size() == 1, "");
  totPs += tmp.size();
  tmp = findAllSubgraphsOfLengthN(mol, 6);
  CHECK_INVARIANT(tmp.empty(), "");
  totPs += tmp.size();

  // now use the direct range function and check that we get the
  // same anwswer
  INT_PATH_LIST_MAP tmpm;
  tmpm = findAllSubgraphsOfLengthsMtoN(mol, 1, 6);
  int newTot, idx;
  newTot = 0;
  for (idx = 1; idx <= 6; idx++) {
    newTot += tmpm[idx].size();
  }
  CHECK_INVARIANT(totPs == newTot, "");

  // add an H and make sure things don't change:
  mol.addAtom(new Atom(1), updateLabel, takeOwnership);
  mol.addBond(5, 6, Bond::SINGLE);

  tmp = findAllSubgraphsOfLengthN(mol, 1);

  CHECK_INVARIANT(tmp.size() == 5, "");
  tmp = findAllSubgraphsOfLengthN(mol, 2);
  CHECK_INVARIANT(tmp.size() == 5, "");
  tmp = findAllSubgraphsOfLengthN(mol, 3);
  CHECK_INVARIANT(tmp.size() == 5, "");
  tmp = findAllSubgraphsOfLengthN(mol, 4);
  CHECK_INVARIANT(tmp.size() == 3, "");
  tmp = findAllSubgraphsOfLengthN(mol, 5);
  CHECK_INVARIANT(tmp.size() == 1, "");
  tmp = findAllSubgraphsOfLengthN(mol, 6);
  CHECK_INVARIANT(tmp.empty(), "");

  std::cout << "Finished" << std::endl;
}
Beispiel #6
0
void testPaths() {
  std::cout << "-----------------------\n Path retrieval" << std::endl;
  // build: CCC(C)CC
  RWMol mol;
  bool updateLabel = true;
  bool takeOwnership = true;
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addBond(0, 1, Bond::SINGLE);
  mol.addBond(1, 2, Bond::SINGLE);
  mol.addBond(2, 3, Bond::SINGLE);
  mol.addBond(2, 4, Bond::SINGLE);
  mol.addBond(3, 5, Bond::SINGLE);

  PATH_LIST tmp;

  //
  //  Retrieve using bonds
  //
  tmp = findAllPathsOfLengthN(mol, 1);
  CHECK_INVARIANT(tmp.size() == 5, "");
  tmp = findAllPathsOfLengthN(mol, 2);
  CHECK_INVARIANT(tmp.size() == 5, "");
  tmp = findAllPathsOfLengthN(mol, 3);
  CHECK_INVARIANT(tmp.size() == 4, "");
  tmp = findAllPathsOfLengthN(mol, 4);
  CHECK_INVARIANT(tmp.size() == 1, "");
  tmp = findAllPathsOfLengthN(mol, 5);
  CHECK_INVARIANT(tmp.empty(), "");
  tmp = findAllPathsOfLengthN(mol, 6);
  CHECK_INVARIANT(tmp.empty(), "");

  //
  //  Retrieve using atoms, which gives the results shifted by
  //  one (it takes two atoms to make one bond)
  //
  tmp = findAllPathsOfLengthN(mol, 1, false);
  CHECK_INVARIANT(tmp.size() == 6, "");
  tmp = findAllPathsOfLengthN(mol, 2, false);
  CHECK_INVARIANT(tmp.size() == 5, "");
  tmp = findAllPathsOfLengthN(mol, 3, false);
  CHECK_INVARIANT(tmp.size() == 5, "");
  tmp = findAllPathsOfLengthN(mol, 4, false);
  CHECK_INVARIANT(tmp.size() == 4, "");
  tmp = findAllPathsOfLengthN(mol, 5, false);
  CHECK_INVARIANT(tmp.size() == 1, "");
  tmp = findAllPathsOfLengthN(mol, 6, false);
  CHECK_INVARIANT(tmp.empty(), "");

  //
  //  try m->n
  //
  INT_PATH_LIST_MAP pths;
  pths = findAllPathsOfLengthsMtoN(mol, 1, 6);
  CHECK_INVARIANT(pths[1].size() == 5, "");
  CHECK_INVARIANT(pths[2].size() == 5, "");
  CHECK_INVARIANT(pths[3].size() == 4, "");
  CHECK_INVARIANT(pths[4].size() == 1, "");
  CHECK_INVARIANT(pths[5].empty(), "");
  CHECK_INVARIANT(pths[6].empty(), "");

  pths = findAllPathsOfLengthsMtoN(mol, 1, 6, false);
  CHECK_INVARIANT(pths[1].size() == 6, "");
  CHECK_INVARIANT(pths[2].size() == 5, "");
  CHECK_INVARIANT(pths[3].size() == 5, "");
  CHECK_INVARIANT(pths[4].size() == 4, "");
  CHECK_INVARIANT(pths[5].size() == 1, "");
  CHECK_INVARIANT(pths[6].empty(), "");

  //
  //  add an atom, close the ring and re-check a couple indices:
  //   (leaves us with CC1CCCCC1)
  //
  mol.addAtom(new Atom(6), updateLabel, takeOwnership);
  mol.addBond(5, 6, Bond::SINGLE);
  mol.addBond(0, 6, Bond::SINGLE);
  tmp = findAllPathsOfLengthN(mol, 4);
  CHECK_INVARIANT(tmp.size() == 8, "");
  tmp = findAllPathsOfLengthN(mol, 5, false);
  CHECK_INVARIANT(tmp.size() == 8, "");

  std::cout << "Finished" << std::endl;
}
void deletebonds(const ROMol &mol, String ftype, int hac){
RWMol *newMol = static_cast<RWMol*>(new ROMol(mol,false));
int total_acyclic = 0;
int total_cyclic = 0;
//find the relevant bonds to break

// Single acyclic Cuts
if(ftype == acyc_smarts){
acyclic_matching_atoms = mol.getSubstructMatches(acyc);
total_acyclic = acyclic_matching_atoms.size();
MatchVectType acyclic_matching_atoms;
std::vector<int> bonds_selected;
Match_Vect bonds_selected;
SubstructMatch(mol, acyc, acyclic_matching_atoms);
// if we didn't find any matches, there's nothing to be done here
    // simply return a list with a copy of the starting molecule
    if (acyclic_matching_atoms.size() == 0) {
      newMol.push_back(ROMOL_SPTR(new ROMol(mol,false)));
      newMol[0]->clearComputedProps(false);
      return newMol;
for(MatchVectType::const_iterator mvit=acyclic_matching_atoms.begin();
        mvit!=acyclic_matching_atoms.end(); mvit++){
      bonds_selected.push_back(mvit->first);
      bonds_selected.push_back(mvit->second);
	bonds_selected[0] = mvit->first;
	bonds_selected[1] = mvit->second;

Atom *at1 = newMol->getAtomWithIdx(bonds_selected[0]);
Atom *at2 = newMol->getAtomWithIdx(bonds_selected[1]);
Atom *atom0 = newMol->getAtomWithIdx(0);
Atom *atom1 = newMol->getAtomWithIdx(0);
newMol->removeBond(bonds_selected[0], bonds_selected[1]);// Break the bond with idx=at1, at2.

// Introduce two dummy atoms in the molecule
newMol->addAtom(atom0);
newMol->addAtom(atom1);
// Bond the dummy atoms to the new terminal atoms
Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[0],atom0->getIdx());
          CHECK_INVARIANT(oBond,"required bond not found");
          newMol->addBond(at1,
                          *atom0,Bond::SINGLE);

Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[1],atom1->getIdx());
          CHECK_INVARIANT(oBond,"required bond not found");
          newMol->addBond(at2,
                          *atom1,Bond::SINGLE);

break;
}

//Now get the modified fragment in smiles(i.e. smi2);

String smi2 = MolToSmiles(*newMol);

//printf (smi2);
	}
}

//cyclic Cuts
if(ftype == cyc_smarts){
cyclic_matching_atoms = mol.getSubstructMatches(cyc);
int total_cyclic = cyclic_matching_atoms.size();
MatchVectType cyclic_matching_atoms;
std::vector<int> bonds_selected;
Match_Vect bonds_selected;
SubstructMatch(mol, cyc, cyclic_matching_atoms);
// if we didn't find any matches, there's nothing to be done here
    // simply return a list with a copy of the starting molecule
    if (cyclic_matching_atoms.size() == 0) {
      newMol.push_back(ROMOL_SPTR(new ROMol(mol,false)));
      newMol[0]->clearComputedProps(false);
      return newMol;
for(MatchVectType::const_iterator mvit=cyclic_matching_atoms.begin();
        mvit!=cyclic_matching_atoms.end(); mvit++){
      bonds_selected.push_back(mvit->first);
      bonds_selected.push_back(mvit->second);
	bonds_selected[0] = mvit->first;
	bonds_selected[1] = mvit->second;

Atom *at1 = newMol->getAtomWithIdx(bonds_selected[0]);
Atom *at2 = newMol->getAtomWithIdx(bonds_selected[1]);
Atom *atom0 = newMol->getAtomWithIdx(0);
Atom *atom1 = newMol->getAtomWithIdx(0);
newMol->removeBond(bonds_selected[0], bonds_selected[1]);// Break the bond with idx=at1, at2.

// Introduce two dummy atoms in the molecule
newMol->addAtom(atom0);
newMol->addAtom(atom1);
// Bond the dummy atoms to the new terminal atoms
Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[0],atom0->getIdx());
          CHECK_INVARIANT(oBond,"required bond not found");
          newMol->addBond(at1,
                          *atom0,Bond::SINGLE);

Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[1],atom1->getIdx());
          CHECK_INVARIANT(oBond,"required bond not found");
          newMol->addBond(at2,
                          *atom1,Bond::SINGLE);

continue;
}
//Now get the modified fragment in smiles(i.e. smi2);

String smi2 = MolToSmiles(*newMol);
//printf (smi2);

//now do an acyclic cut with the successful cyclic cut on the mol
for(MatchVectType::const_iterator mvit=acyclic_matching_atoms.begin();
        mvit!=acyclic_matching_atoms.end(); mvit++){
      bonds_selected.push_back(mvit->first);
      bonds_selected.push_back(mvit->second);
	bonds_selected[0] = mvit->first;
	bonds_selected[1] = mvit->second;

Atom *at1 = newMol->getAtomWithIdx(bonds_selected[0]);
Atom *at2 = newMol->getAtomWithIdx(bonds_selected[1]);
Atom *atom0 = newMol->getAtomWithIdx(0);
Atom *atom1 = newMol->getAtomWithIdx(0);
newMol->removeBond(bonds_selected[0], bonds_selected[1]);// Break the bond with idx=at1, at2.

// Introduce two dummy atoms in the molecule
newMol->addAtom(atom0);
newMol->addAtom(atom1);
// Bond the dummy atoms to the new terminal atoms
Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[0],atom0->getIdx());
          CHECK_INVARIANT(oBond,"required bond not found");
          newMol->addBond(at1,
                          *nbrIdx,oBond->getBondType());

Bond *oBond=newMol->getBondBetweenAtoms(bonds_selected[1],atom1->getIdx());
          CHECK_INVARIANT(oBond,"required bond not found");
          newMol->addBond(at2,
                          *atom1,Bond::SINGLE);

continue;
}

//Now get the modified fragment in smiles(i.e. smi2);

String smi2 = MolToSmiles(*newMol);
//printf (smi2);
	}
}

//determine whether ring cut is valid
String cSma1 = ("[#0][r].[r][#0]");
static ROMol *Sma1 = SmartsToMol("[#0][r].[r][#0]");
String cSma2 = ("[#0][r][#0]");
static ROMol *Sma2 = SmartsToMol("[#0][r][#0]");
void is_ring_cut_valid(ROMol *fMol, ROMol *Sma1, ROMol *Sma2){
//to check is a fragment is a valid ring cut, it needs to match the
//smarts: [$([#0][r].[r][#0]),$([#0][r][#0])]

	boolean valid = false;
	ROMol *m = new RWMol();
//if m is not None:
	if (m != NULL){
    //use global smarts
    if(m->hasSubstructMatch(Sma1) || m->hasSubstructMatch(Sma2)){
    	int atom_count = m->getNumAtoms();
    	valid = true;
    		}
	}
}