void SetSDWriterProps(SDWriter &writer, python::object props) { // convert the python list to a STR_VECT STR_VECT propNames; PySequenceHolder<std::string> seq(props); for (unsigned int i = 0; i < seq.size(); i++) { propNames.push_back(seq[i]); } writer.setProps(propNames); }
void testSDMemoryCorruption() { std::string rdbase = getenv("RDBASE"); std::string fname = rdbase + "/Data/NCI/first_200.props.sdf"; SDMolSupplier sdsup(fname,true); std::string ofile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_first_200.props.sdf"; std::ostream *os=new std::ofstream(ofile.c_str()); //std::ostream *os=new std::stringstream(); SDWriter *writer = new SDWriter(os,false); STR_VECT names; #if 1 ROMol *m1=sdsup.next(); MolOps::sanitizeMol(*(RWMol *)m1); #else ROMol *m1=SmilesToMol("C1CC1"); TEST_ASSERT(m1); #endif sdsup.reset(); int nDone=0; while (!sdsup.atEnd()) { //std::cerr<<nDone<<std::endl; ROMol *mol = sdsup.next(); //std::cerr<<"m:"<<mol<<std::endl; TEST_ASSERT(mol); std::string mname; mol->getProp("_Name", mname); names.push_back(mname); //std::cerr<<" w"<<std::endl; writer->write(*mol); //std::cerr<<" ok"<<std::endl; delete mol; nDone++; } CHECK_INVARIANT(nDone == 200, ""); writer->flush(); CHECK_INVARIANT(writer->numMols() == 200, ""); delete writer; #if 1 // now read in the file we just finished writing SDMolSupplier reader(ofile); int i = 0; while (!reader.atEnd()) { ROMol *mol = reader.next(); std::string mname; mol->getProp("_Name", mname); CHECK_INVARIANT(mname == names[i], ""); delete mol; i++; } #endif }
void testSDWriter() { std::string rdbase = getenv("RDBASE"); std::string fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf"; SDMolSupplier sdsup(fname); std::string ofile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_few.sdf"; SDWriter *writer = new SDWriter(ofile); STR_VECT names; while (!sdsup.atEnd()) { ROMol *mol = sdsup.next(); std::string mname; mol->getProp("_Name", mname); names.push_back(mname); writer->write(*mol); delete mol; } writer->flush(); CHECK_INVARIANT(writer->numMols() == 16, ""); // make sure we can close() the writer and delete it: writer->close(); delete writer; // now read in the file we just finished writing SDMolSupplier reader(ofile); int i = 0; while (!reader.atEnd()) { ROMol *mol = reader.next(); std::string mname; mol->getProp("_Name", mname); CHECK_INVARIANT(mname == names[i], ""); delete mol; i++; } // now read in a file with aromatic information on the bonds std::string infile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_arom.sdf"; SDMolSupplier nreader(infile); i = 0; while (!nreader.atEnd()) { ROMol *mol = nreader.next(); std::string mname; mol->getProp("_Name", mname); CHECK_INVARIANT(mname == names[i], ""); i++; delete mol; } }
void testSDWriterStrm() { std::string rdbase = getenv("RDBASE"); { std::string fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf"; SDMolSupplier sdsup(fname); std::string ofile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_few.sdf"; std::ofstream *oStream=new std::ofstream(ofile.c_str()); SDWriter *writer = new SDWriter(oStream); STR_VECT names; while (!sdsup.atEnd()) { ROMol *mol = sdsup.next(); std::string mname; mol->getProp("_Name", mname); names.push_back(mname); writer->write(*mol); delete mol; } writer->flush(); CHECK_INVARIANT(writer->numMols() == 16, ""); delete writer; // now read in the file we just finished writing SDMolSupplier reader(ofile); int i = 0; while (!reader.atEnd()) { ROMol *mol = reader.next(); std::string mname; mol->getProp("_Name", mname); CHECK_INVARIANT(mname == names[i], ""); delete mol; i++; } } { // now read in a file with aromatic information on the bonds std::string infile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_arom.sdf"; SDMolSupplier nreader(infile); unsigned int i = 0; while (!nreader.atEnd()) { ROMol *mol = nreader.next(); TEST_ASSERT(mol); ++i; delete mol; } TEST_ASSERT(i==16); } }
void testSmilesWriterNoNames() { std::string rdbase = getenv("RDBASE"); std::string fname = rdbase + "/Code/GraphMol/FileParsers/test_data/fewSmi.csv"; SmilesMolSupplier *nSup = new SmilesMolSupplier(fname, ",", 1, 0, false); std::string oname = rdbase + "/Code/GraphMol/FileParsers/test_data/outSmiles.csv"; STR_VECT propNames; propNames.push_back(std::string("Column_2")); SmilesWriter *writer = new SmilesWriter(oname," ",""); writer->setProps(propNames); STR_VECT props; ROMol *mol = nSup->next(); while (mol) { std::string mname, pval; mol->getProp("Column_2", pval); mol->setProp("_Name","bogus"); props.push_back(pval); writer->write(*mol); delete mol; try { mol = nSup->next(); } catch (FileParseException &) { break; } } writer->flush(); delete writer; delete nSup; // now read the molecules back in a check if we have the same properties etc nSup = new SmilesMolSupplier(oname,",",0,-1); int i = 0; mol = nSup->next(); while (mol){ std::string mname, pval; mol->getProp("_Name", mname); TEST_ASSERT(mname!="bogus"); mol->getProp("Column_2", pval); TEST_ASSERT(pval == props[i]); i++; delete mol; try { mol = nSup->next(); } catch (FileParseException &) { break; } } }
void testTDTWriterStrm() { std::string rdbase = getenv("RDBASE"); std::string fname = rdbase + "/Code/GraphMol/FileParsers/test_data/NCI_aids_few.sdf"; SDMolSupplier sdsup(fname); std::string ofile = rdbase + "/Code/GraphMol/FileParsers/test_data/outNCI_few.tdt"; std::ofstream *oStream=new std::ofstream(ofile.c_str()); TDTWriter *writer = new TDTWriter(oStream); STR_VECT names; while (!sdsup.atEnd()) { ROMol *mol = sdsup.next(); std::string mname; mol->getProp("CAS_RN", mname); names.push_back(mname); writer->write(*mol); delete mol; } writer->flush(); TEST_ASSERT(writer->numMols() == 16); delete writer; // now read in the file we just finished writing TDTMolSupplier reader(ofile); int i = 0; while (!reader.atEnd()) { ROMol *mol = reader.next(); if(mol){ std::string mname; mol->getProp("CAS_RN", mname); CHECK_INVARIANT(mname == names[i], ""); delete mol; } i++; } TEST_ASSERT(i==16); }
void SDWriter::write(const ROMol &mol, int confId) { PRECONDITION(dp_ostream,"no output stream"); // write the molecule (*dp_ostream) << MolToMolBlock(mol, true, confId, df_kekulize, df_forceV3000); // now write the properties STR_VECT_CI pi; if (d_props.size() > 0) { // check if we have any properties the user specified to write out // in which loop over them and write them out for (pi = d_props.begin(); pi != d_props.end(); pi++) { if (mol.hasProp(*pi)) { writeProperty(mol, (*pi)); } } } else { // if use did not specify any properties, write all non computed properties // out to the file STR_VECT properties = mol.getPropList(); STR_VECT compLst; if (mol.hasProp(detail::computedPropName)) { mol.getProp(detail::computedPropName, compLst); } STR_VECT_CI pi; for (pi = properties.begin(); pi != properties.end(); pi++) { // ignore any of the following properties if ( ((*pi) == detail::computedPropName) || ((*pi) == "_Name") || ((*pi) == "_MolFileInfo") || ((*pi) == "_MolFileComments") || ((*pi) == "_MolFileChiralFlag")) { continue; } // check if this property is not computed if (std::find(compLst.begin(), compLst.end(), (*pi)) == compLst.end()) { writeProperty(mol, (*pi)); } } } // add the $$$$ that marks the end of a molecule (*dp_ostream) << "$$$$\n"; ++d_molid; }
void TDTWriter::write(const ROMol &mol, int confId) { CHECK_INVARIANT(dp_ostream,"no output stream"); //start by writing a "|" line unless this is the first line if (d_molid > 0) { (*dp_ostream) << "|\n"; } // write the molecule (*dp_ostream) << "$SMI<" << MolToSmiles(mol) << ">\n"; if(df_writeNames && mol.hasProp("_Name")){ std::string name; mol.getProp("_Name",name); (*dp_ostream) << "NAME<" << name << ">\n"; } // do we need to write coordinates? if(mol.getNumConformers()){ // get the ordering of the atoms in the output SMILES: std::vector<unsigned int> atomOrdering; mol.getProp("_smilesAtomOutputOrder",atomOrdering); const Conformer &conf = mol.getConformer(confId); if(df_write2D){ (*dp_ostream) << "2D<"; } else { (*dp_ostream) << "3D<"; } const RDGeom::POINT3D_VECT &coords=conf.getPositions(); int nAts=atomOrdering.size(); for(int i=0;i<nAts;i++){ (*dp_ostream) << std::setprecision(d_numDigits) << coords[atomOrdering[i]].x << ","; (*dp_ostream) << std::setprecision(d_numDigits) << coords[atomOrdering[i]].y; if(!df_write2D){ (*dp_ostream) << "," << std::setprecision(d_numDigits) << coords[atomOrdering[i]].z; } if(i!=nAts-1) (*dp_ostream) << ","; } (*dp_ostream) << ";>\n"; } // now write the properties STR_VECT_CI pi; if (d_props.size() > 0) { // check if we have any properties the user specified to write out // in which loop over them and write them out for (pi = d_props.begin(); pi != d_props.end(); pi++) { if (mol.hasProp(*pi)) { writeProperty(mol, (*pi)); } } } else { // if use did not specify any properties, write all non computed properties // out to the file STR_VECT properties = mol.getPropList(); STR_VECT compLst; if (mol.hasProp(detail::computedPropName)) { mol.getProp(detail::computedPropName, compLst); } STR_VECT_CI pi; for (pi = properties.begin(); pi != properties.end(); pi++) { // ignore any of the following properties if ( ((*pi) == detail::computedPropName) || ((*pi) == "_Name") || ((*pi) == "_MolFileInfo") || ((*pi) == "_MolFileComments") || ((*pi) == "_MolFileChiralFlag")) { continue; } // check if this property is not computed if (std::find(compLst.begin(), compLst.end(), (*pi)) == compLst.end()) { writeProperty(mol, (*pi)); } } } d_molid++; }
ROMol *SmilesMolSupplier::processLine(std::string inLine) { ROMol *res = NULL; try { // ----------- // tokenize the input line: // ----------- boost::char_separator<char> sep(d_delim.c_str(), "", boost::keep_empty_tokens); tokenizer tokens(inLine, sep); STR_VECT recs; for (tokenizer::iterator tokIter = tokens.begin(); tokIter != tokens.end(); ++tokIter) { std::string rec = strip(*tokIter); recs.push_back(rec); } if (recs.size() <= static_cast<unsigned int>(d_smi)) { std::ostringstream errout; errout << "ERROR: line #" << d_line << "does not contain enough tokens\n"; throw FileParseException(errout.str()); } // ----------- // get the smiles and create a molecule // ----------- res = SmilesToMol(recs[d_smi], 0, df_sanitize); if (!res) { std::stringstream errout; errout << "Cannot create molecule from : '" << recs[d_smi] << "'"; throw SmilesParseException(errout.str()); } // ----------- // get the name (if there's a name column) // ----------- if (d_name == -1) { // if no name defaults it to the line number we read it from string std::ostringstream tstr; tstr << d_line; std::string mname = tstr.str(); res->setProp(common_properties::_Name, mname); } else { if (d_name >= static_cast<int>(recs.size())) { BOOST_LOG(rdWarningLog) << "WARNING: no name column found on line " << d_line << std::endl; } else { res->setProp(common_properties::_Name, recs[d_name]); } } // ----------- // read in the properties // ----------- unsigned int iprop = 0; for (unsigned int col = 0; col < recs.size(); col++) { if (static_cast<int>(col) == d_smi || static_cast<int>(col) == d_name) continue; std::string pname, pval; if (d_props.size() > col) { pname = d_props[col]; } else { pname = "Column_"; std::stringstream ss; ss << col; pname += ss.str(); } pval = recs[col]; res->setProp(pname, pval); iprop++; } } catch (const SmilesParseException &pe) { // Couldn't parse the passed in smiles // Simply print out a message BOOST_LOG(rdErrorLog) << "ERROR: Smiles parse error on line " << d_line << "\n"; BOOST_LOG(rdErrorLog) << "ERROR: " << pe.message() << "\n"; res = NULL; } catch (const MolSanitizeException &se) { // We couldn't sanitize the molecule // write out an error message BOOST_LOG(rdErrorLog) << "ERROR: Could not sanitize molecule on line " << d_line << std::endl; BOOST_LOG(rdErrorLog) << "ERROR: " << se.message() << "\n"; res = NULL; } catch (...) { // write out an error message BOOST_LOG(rdErrorLog) << "ERROR: Could not process molecule on line " << d_line << std::endl; res = NULL; } return res; }