ROMol *TDTMolSupplier::parseMol(std::string inLine){ PRECONDITION(dp_inStream,"no stream"); Utils::LocaleSwitcher ls; std::size_t startP=inLine.find("<"); std::size_t endP=inLine.find_last_of(">"); std::string smiles = inLine.substr(startP+1,endP-startP-1); ROMol *res = SmilesToMol(smiles,0,df_sanitize); if(res && res->getNumAtoms()>0){ // ----------- // Process the properties: d_line++; std::getline(*dp_inStream,inLine); while(!dp_inStream->eof() && inLine.find("|")!=0){ endP=inLine.find("<"); std::string propName = inLine.substr(0,endP); boost::trim_if(propName,boost::is_any_of(" \t")); startP = endP+1; if(propName=="2D" && d_confId2D>=0){ std::string rest=inLine.substr(startP,inLine.size()-startP); std::vector<double> coords; TDTParseUtils::ParseNumberList(rest,coords,dp_inStream); Conformer *conf=new Conformer(res->getNumAtoms()); conf->setId(d_confId2D); conf->set3D(false); for(unsigned int atIdx=0;atIdx<res->getNumAtoms();atIdx++){ if(2*atIdx+1 < coords.size()){ conf->setAtomPos(atIdx,RDGeom::Point3D(coords[2*atIdx],coords[2*atIdx+1],0.0)); } else { // we're going to let this slide... but maybe we should do something else? } } res->addConformer(conf,false); } else if(propName=="3D" && d_confId3D>=0){ std::string rest=inLine.substr(startP,inLine.size()-startP); std::vector<double> coords; TDTParseUtils::ParseNumberList(rest,coords,dp_inStream); Conformer *conf=new Conformer(res->getNumAtoms()); conf->setId(d_confId3D); conf->set3D(true); for(unsigned int atIdx=0;atIdx<res->getNumAtoms();atIdx++){ if(3*atIdx+2 < coords.size()){ conf->setAtomPos(atIdx,RDGeom::Point3D(coords[3*atIdx], coords[3*atIdx+1], coords[3*atIdx+2])); } else { // we're going to let this slide... but maybe we should do something else? } } res->addConformer(conf,false); } else { endP=inLine.find_last_of(">"); if(endP==std::string::npos){ std::ostringstream errout; errout << "no end tag found for property" << propName; throw FileParseException(errout.str()); } else { std::string propVal = inLine.substr(startP,endP-startP); res->setProp(propName,propVal); if(propName==d_nameProp) res->setProp("_Name",propVal); } } std::getline(*dp_inStream,inLine); } } return res; }
ROMol *SmilesMolSupplier::processLine(std::string inLine) { ROMol *res = NULL; try { // ----------- // tokenize the input line: // ----------- boost::char_separator<char> sep(d_delim.c_str(), "", boost::keep_empty_tokens); tokenizer tokens(inLine, sep); STR_VECT recs; for (tokenizer::iterator tokIter = tokens.begin(); tokIter != tokens.end(); ++tokIter) { std::string rec = strip(*tokIter); recs.push_back(rec); } if (recs.size() <= static_cast<unsigned int>(d_smi)) { std::ostringstream errout; errout << "ERROR: line #" << d_line << "does not contain enough tokens\n"; throw FileParseException(errout.str()); } // ----------- // get the smiles and create a molecule // ----------- res = SmilesToMol(recs[d_smi], 0, df_sanitize); if (!res) { std::stringstream errout; errout << "Cannot create molecule from : '" << recs[d_smi] << "'"; throw SmilesParseException(errout.str()); } // ----------- // get the name (if there's a name column) // ----------- if (d_name == -1) { // if no name defaults it to the line number we read it from string std::ostringstream tstr; tstr << d_line; std::string mname = tstr.str(); res->setProp(common_properties::_Name, mname); } else { if (d_name >= static_cast<int>(recs.size())) { BOOST_LOG(rdWarningLog) << "WARNING: no name column found on line " << d_line << std::endl; } else { res->setProp(common_properties::_Name, recs[d_name]); } } // ----------- // read in the properties // ----------- unsigned int iprop = 0; for (unsigned int col = 0; col < recs.size(); col++) { if (static_cast<int>(col) == d_smi || static_cast<int>(col) == d_name) continue; std::string pname, pval; if (d_props.size() > col) { pname = d_props[col]; } else { pname = "Column_"; std::stringstream ss; ss << col; pname += ss.str(); } pval = recs[col]; res->setProp(pname, pval); iprop++; } } catch (const SmilesParseException &pe) { // Couldn't parse the passed in smiles // Simply print out a message BOOST_LOG(rdErrorLog) << "ERROR: Smiles parse error on line " << d_line << "\n"; BOOST_LOG(rdErrorLog) << "ERROR: " << pe.message() << "\n"; res = NULL; } catch (const MolSanitizeException &se) { // We couldn't sanitize the molecule // write out an error message BOOST_LOG(rdErrorLog) << "ERROR: Could not sanitize molecule on line " << d_line << std::endl; BOOST_LOG(rdErrorLog) << "ERROR: " << se.message() << "\n"; res = NULL; } catch (...) { // write out an error message BOOST_LOG(rdErrorLog) << "ERROR: Could not process molecule on line " << d_line << std::endl; res = NULL; } return res; }