Exemplo n.º 1
0
void SetSDWriterProps(SDWriter &writer, python::object props) {
  // convert the python list to a STR_VECT
  STR_VECT propNames;
  PySequenceHolder<std::string> seq(props);
  for (unsigned int i = 0; i < seq.size(); i++) {
    propNames.push_back(seq[i]);
  }
  writer.setProps(propNames);
}
Exemplo n.º 2
0
void testSDMemoryCorruption() {
  std::string rdbase = getenv("RDBASE");
  std::string fname = rdbase + "/Data/NCI/first_200.props.sdf";
  SDMolSupplier sdsup(fname,true);
  std::string ofile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_first_200.props.sdf";
  std::ostream *os=new std::ofstream(ofile.c_str());
  //std::ostream *os=new std::stringstream();
  SDWriter *writer = new SDWriter(os,false);

  STR_VECT names;
#if 1
  ROMol *m1=sdsup.next();
  MolOps::sanitizeMol(*(RWMol *)m1);
#else
  ROMol *m1=SmilesToMol("C1CC1");
  TEST_ASSERT(m1);
#endif
  sdsup.reset();
  int nDone=0;
  while (!sdsup.atEnd()) {
    //std::cerr<<nDone<<std::endl;
    ROMol *mol = sdsup.next();
    //std::cerr<<"m:"<<mol<<std::endl;
    TEST_ASSERT(mol);
    std::string mname;
    mol->getProp("_Name", mname);
    names.push_back(mname);
    //std::cerr<<"  w"<<std::endl;
    writer->write(*mol);

    //std::cerr<<"  ok"<<std::endl;
    
    delete mol;
    nDone++;
  }
  CHECK_INVARIANT(nDone == 200, "");
  writer->flush();
  CHECK_INVARIANT(writer->numMols() == 200, "");

  delete writer;
#if 1
  // now read in the file we just finished writing
  SDMolSupplier reader(ofile);
  int i = 0;
  while (!reader.atEnd()) {
    ROMol *mol = reader.next();
    std::string mname;
    mol->getProp("_Name", mname);
    CHECK_INVARIANT(mname == names[i], "");

    
    delete mol;
    i++;
  }
#endif
}
Exemplo n.º 3
0
void testSDWriter() {
  std::string rdbase = getenv("RDBASE");
  std::string fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
  SDMolSupplier sdsup(fname);
  
  std::string ofile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_few.sdf";
  SDWriter *writer = new SDWriter(ofile);

  STR_VECT names;
 
  while (!sdsup.atEnd()) {
    ROMol *mol = sdsup.next();
    std::string mname;
    mol->getProp("_Name", mname);
    names.push_back(mname);

    writer->write(*mol);
    delete mol;
  }
  writer->flush();
  CHECK_INVARIANT(writer->numMols() == 16, "");

  // make sure we can close() the writer and delete it:
  writer->close();
  
  delete writer;

  // now read in the file we just finished writing
  SDMolSupplier reader(ofile);
  int i = 0;
  while (!reader.atEnd()) {
    ROMol *mol = reader.next();
    std::string mname;
    mol->getProp("_Name", mname);
    CHECK_INVARIANT(mname == names[i], "");
    
    delete mol;
    i++;
  }

  // now read in a file with aromatic information on the bonds
  std::string infile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_arom.sdf";
  SDMolSupplier nreader(infile);
  i = 0;
  while (!nreader.atEnd()) {
    ROMol *mol = nreader.next();
    std::string mname;
    mol->getProp("_Name", mname);
    CHECK_INVARIANT(mname == names[i], "");
    i++;
    
    delete mol;
  }

}
Exemplo n.º 4
0
void testSDWriterStrm() {
  std::string rdbase = getenv("RDBASE");
  {
    std::string fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
    SDMolSupplier sdsup(fname);
  
    std::string ofile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_few.sdf";
    std::ofstream *oStream=new std::ofstream(ofile.c_str());

    SDWriter *writer = new SDWriter(oStream);

    STR_VECT names;
 
    while (!sdsup.atEnd()) {
      ROMol *mol = sdsup.next();
      std::string mname;
      mol->getProp("_Name", mname);
      names.push_back(mname);

      writer->write(*mol);
      delete mol;
    }
    writer->flush();
    CHECK_INVARIANT(writer->numMols() == 16, "");
    delete writer;

    // now read in the file we just finished writing
    SDMolSupplier reader(ofile);
    int i = 0;
    while (!reader.atEnd()) {
      ROMol *mol = reader.next();
      std::string mname;
      mol->getProp("_Name", mname);
      CHECK_INVARIANT(mname == names[i], "");
    
      delete mol;
      i++;
    }
  }

  {
    // now read in a file with aromatic information on the bonds
    std::string infile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_arom.sdf";
    SDMolSupplier nreader(infile);
    unsigned int i = 0;
    while (!nreader.atEnd()) {
      ROMol *mol = nreader.next();
      TEST_ASSERT(mol);
      ++i;
      delete mol;
    }
    TEST_ASSERT(i==16);
  }
}
Exemplo n.º 5
0
void testSmilesWriterNoNames() {
  std::string rdbase = getenv("RDBASE");
  std::string fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv";
  SmilesMolSupplier *nSup = new SmilesMolSupplier(fname, ",", 1, 0, false);
  std::string oname = rdbase + "/Code/GraphMol/FileParsers/test_data/outSmiles.csv";
  
  STR_VECT propNames;
  propNames.push_back(std::string("Column_2"));
  SmilesWriter *writer = new SmilesWriter(oname," ","");
  writer->setProps(propNames);

  STR_VECT props;
  ROMol *mol = nSup->next();
  while (mol) {
    std::string mname, pval;
    mol->getProp("Column_2", pval);
    mol->setProp("_Name","bogus");
    props.push_back(pval);
    writer->write(*mol);
    delete mol;
    try {
      mol = nSup->next();
    } catch (FileParseException &) {
      break;
    }
  }
  writer->flush();
  delete writer;
  delete nSup;

  // now read the molecules back in a check if we have the same properties etc
  nSup = new SmilesMolSupplier(oname,",",0,-1);
  int i = 0;
  mol = nSup->next();
  while (mol){
    std::string mname, pval;
    mol->getProp("_Name", mname);
    TEST_ASSERT(mname!="bogus");
    mol->getProp("Column_2", pval);
    TEST_ASSERT(pval == props[i]);
    i++;
    delete mol;
    try {
      mol = nSup->next();
    } catch (FileParseException &) {
      break;
    }
  }
}
Exemplo n.º 6
0
void testTDTWriterStrm() {
  std::string rdbase = getenv("RDBASE");
  std::string fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf";
  SDMolSupplier sdsup(fname);
  
  std::string ofile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_few.tdt";
  std::ofstream *oStream=new std::ofstream(ofile.c_str());
  TDTWriter *writer = new TDTWriter(oStream);

  STR_VECT names;
 
  while (!sdsup.atEnd()) {
    ROMol *mol = sdsup.next();
    std::string mname;
    mol->getProp("CAS_RN", mname);
    names.push_back(mname);

    writer->write(*mol);
    delete mol;
  }
  writer->flush();
  TEST_ASSERT(writer->numMols() == 16);
  delete writer;

  // now read in the file we just finished writing
  TDTMolSupplier reader(ofile);
  int i = 0;
  while (!reader.atEnd()) {
    ROMol *mol = reader.next();
    if(mol){
      std::string mname;
      mol->getProp("CAS_RN", mname);
      CHECK_INVARIANT(mname == names[i], "");
      delete mol;
    }
    i++;
  }
  TEST_ASSERT(i==16);
}
Exemplo n.º 7
0
  void SDWriter::write(const ROMol &mol, int confId) {
    PRECONDITION(dp_ostream,"no output stream");

    // write the molecule 
    (*dp_ostream) << MolToMolBlock(mol, true, confId, df_kekulize, df_forceV3000);

    // now write the properties
    STR_VECT_CI pi;
    if (d_props.size() > 0) {
      // check if we have any properties the user specified to write out
      // in which loop over them and write them out
      for (pi = d_props.begin(); pi != d_props.end(); pi++) {
	if (mol.hasProp(*pi)) {
	  writeProperty(mol, (*pi));
	}
      }
    }
    else {
      // if use did not specify any properties, write all non computed properties
      // out to the file
      STR_VECT properties = mol.getPropList();
      STR_VECT compLst;
      if (mol.hasProp(detail::computedPropName)) {
	mol.getProp(detail::computedPropName, compLst);
      }
      STR_VECT_CI pi;
      for (pi = properties.begin(); pi != properties.end(); pi++) {

	// ignore any of the following properties
	if ( ((*pi) == detail::computedPropName) || 
	     ((*pi) == "_Name") ||
	     ((*pi) == "_MolFileInfo") ||
	     ((*pi) == "_MolFileComments") ||
             ((*pi) == "_MolFileChiralFlag")) {
	  continue;
	}

	// check if this property is not computed
	if (std::find(compLst.begin(), compLst.end(), (*pi)) == compLst.end()) {
	  writeProperty(mol, (*pi));
	}
      }
    }
    // add the $$$$ that marks the end of a molecule
    (*dp_ostream) << "$$$$\n";

    ++d_molid;
  }
Exemplo n.º 8
0
  void TDTWriter::write(const ROMol &mol, int confId) {
    CHECK_INVARIANT(dp_ostream,"no output stream");
    //start by writing a "|" line unless this is the first line
    if (d_molid > 0) {
      (*dp_ostream) << "|\n";
    }

    // write the molecule 
    (*dp_ostream) << "$SMI<" << MolToSmiles(mol) << ">\n";
    
    if(df_writeNames && mol.hasProp("_Name")){
      std::string name;
      mol.getProp("_Name",name);
      (*dp_ostream) << "NAME<" << name << ">\n";
    }
    
    // do we need to write coordinates?
    if(mol.getNumConformers()){
      // get the ordering of the atoms in the output SMILES:
      std::vector<unsigned int> atomOrdering;
      mol.getProp("_smilesAtomOutputOrder",atomOrdering);
      
      const Conformer &conf = mol.getConformer(confId);
      if(df_write2D){
	(*dp_ostream) << "2D<";
      } else {
	(*dp_ostream) << "3D<";
      }
      const RDGeom::POINT3D_VECT &coords=conf.getPositions();
      int nAts=atomOrdering.size();
      for(int i=0;i<nAts;i++){
	(*dp_ostream) << std::setprecision(d_numDigits) << coords[atomOrdering[i]].x << ",";
	(*dp_ostream) << std::setprecision(d_numDigits) << coords[atomOrdering[i]].y;
	if(!df_write2D){
	  (*dp_ostream) << "," << std::setprecision(d_numDigits) << coords[atomOrdering[i]].z;
	}
	if(i!=nAts-1) (*dp_ostream) << ",";
      }
      (*dp_ostream) << ";>\n";

    }
    // now write the properties
    STR_VECT_CI pi;
    if (d_props.size() > 0) {
      // check if we have any properties the user specified to write out
      // in which loop over them and write them out
      for (pi = d_props.begin(); pi != d_props.end(); pi++) {
	if (mol.hasProp(*pi)) {
	  writeProperty(mol, (*pi));
	}
      }
    }
    else {
      // if use did not specify any properties, write all non computed properties
      // out to the file
      STR_VECT properties = mol.getPropList();
      STR_VECT compLst;
      if (mol.hasProp(detail::computedPropName)) {
	mol.getProp(detail::computedPropName, compLst);
      }
      STR_VECT_CI pi;
      for (pi = properties.begin(); pi != properties.end(); pi++) {

	// ignore any of the following properties
	if ( ((*pi) == detail::computedPropName) || 
	     ((*pi) == "_Name") ||
	     ((*pi) == "_MolFileInfo") ||
	     ((*pi) == "_MolFileComments") ||
             ((*pi) == "_MolFileChiralFlag")) {
	  continue;
	}

	// check if this property is not computed
	if (std::find(compLst.begin(), compLst.end(), (*pi)) == compLst.end()) {
	  writeProperty(mol, (*pi));
	}
      }
    }
    d_molid++;
  }
Exemplo n.º 9
0
ROMol *SmilesMolSupplier::processLine(std::string inLine) {
  ROMol *res = NULL;

  try {
    // -----------
    // tokenize the input line:
    // -----------
    boost::char_separator<char> sep(d_delim.c_str(), "",
                                    boost::keep_empty_tokens);
    tokenizer tokens(inLine, sep);
    STR_VECT recs;
    for (tokenizer::iterator tokIter = tokens.begin(); tokIter != tokens.end();
         ++tokIter) {
      std::string rec = strip(*tokIter);
      recs.push_back(rec);
    }
    if (recs.size() <= static_cast<unsigned int>(d_smi)) {
      std::ostringstream errout;
      errout << "ERROR: line #" << d_line << "does not contain enough tokens\n";
      throw FileParseException(errout.str());
    }

    // -----------
    // get the smiles and create a molecule
    // -----------
    res = SmilesToMol(recs[d_smi], 0, df_sanitize);
    if (!res) {
      std::stringstream errout;
      errout << "Cannot create molecule from : '" << recs[d_smi] << "'";
      throw SmilesParseException(errout.str());
    }

    // -----------
    // get the name (if there's a name column)
    // -----------
    if (d_name == -1) {
      // if no name defaults it to the line number we read it from string
      std::ostringstream tstr;
      tstr << d_line;
      std::string mname = tstr.str();
      res->setProp(common_properties::_Name, mname);
    } else {
      if (d_name >= static_cast<int>(recs.size())) {
        BOOST_LOG(rdWarningLog) << "WARNING: no name column found on line "
                                << d_line << std::endl;
      } else {
        res->setProp(common_properties::_Name, recs[d_name]);
      }
    }

    // -----------
    // read in the properties
    // -----------
    unsigned int iprop = 0;
    for (unsigned int col = 0; col < recs.size(); col++) {
      if (static_cast<int>(col) == d_smi || static_cast<int>(col) == d_name)
        continue;
      std::string pname, pval;
      if (d_props.size() > col) {
        pname = d_props[col];
      } else {
        pname = "Column_";
        std::stringstream ss;
        ss << col;
        pname += ss.str();
      }

      pval = recs[col];
      res->setProp(pname, pval);
      iprop++;
    }

  } catch (const SmilesParseException &pe) {
    // Couldn't parse the passed in smiles
    // Simply print out a message
    BOOST_LOG(rdErrorLog) << "ERROR: Smiles parse error on line " << d_line
                          << "\n";
    BOOST_LOG(rdErrorLog) << "ERROR: " << pe.message() << "\n";
    res = NULL;
  } catch (const MolSanitizeException &se) {
    // We couldn't sanitize the molecule
    //  write out an error message
    BOOST_LOG(rdErrorLog) << "ERROR: Could not sanitize molecule on line "
                          << d_line << std::endl;
    BOOST_LOG(rdErrorLog) << "ERROR: " << se.message() << "\n";
    res = NULL;
  } catch (...) {
    //  write out an error message
    BOOST_LOG(rdErrorLog) << "ERROR: Could not process molecule on line "
                          << d_line << std::endl;
    res = NULL;
  }

  return res;
}