Esempio n. 1
0
  ROMol *TDTMolSupplier::parseMol(std::string inLine){
    PRECONDITION(dp_inStream,"no stream");
    Utils::LocaleSwitcher ls;
    std::size_t startP=inLine.find("<");
    std::size_t endP=inLine.find_last_of(">");
    std::string smiles = inLine.substr(startP+1,endP-startP-1);
    ROMol *res = SmilesToMol(smiles,0,df_sanitize);

    if(res && res->getNumAtoms()>0){
      // -----------
      //   Process the properties:
      d_line++;
      std::getline(*dp_inStream,inLine);
      while(!dp_inStream->eof() && inLine.find("|")!=0){
        endP=inLine.find("<");
        std::string propName = inLine.substr(0,endP);
        boost::trim_if(propName,boost::is_any_of(" \t"));
        startP = endP+1;

        if(propName=="2D" && d_confId2D>=0){
          std::string rest=inLine.substr(startP,inLine.size()-startP);
          std::vector<double> coords;
          TDTParseUtils::ParseNumberList(rest,coords,dp_inStream);
          Conformer *conf=new Conformer(res->getNumAtoms());
          conf->setId(d_confId2D);
          conf->set3D(false);
          for(unsigned int atIdx=0;atIdx<res->getNumAtoms();atIdx++){
            if(2*atIdx+1 < coords.size()){
              conf->setAtomPos(atIdx,RDGeom::Point3D(coords[2*atIdx],coords[2*atIdx+1],0.0));
            } else {
              // we're going to let this slide... but maybe we should do something else?
            }
          }
          res->addConformer(conf,false);
        } else if(propName=="3D" && d_confId3D>=0){
          std::string rest=inLine.substr(startP,inLine.size()-startP);
          std::vector<double> coords;
          TDTParseUtils::ParseNumberList(rest,coords,dp_inStream);
          Conformer *conf=new Conformer(res->getNumAtoms());
          conf->setId(d_confId3D);
          conf->set3D(true);
          for(unsigned int atIdx=0;atIdx<res->getNumAtoms();atIdx++){
            if(3*atIdx+2 < coords.size()){
              conf->setAtomPos(atIdx,RDGeom::Point3D(coords[3*atIdx],
                                                     coords[3*atIdx+1],
                                                     coords[3*atIdx+2]));
            } else {
              // we're going to let this slide... but maybe we should do something else?
            }
          }
          res->addConformer(conf,false);
        } else {
          endP=inLine.find_last_of(">");
          if(endP==std::string::npos){
            std::ostringstream errout;
            errout << "no end tag found for property" << propName;
            throw FileParseException(errout.str());
          } else {
            std::string propVal = inLine.substr(startP,endP-startP);
            res->setProp(propName,propVal);
            if(propName==d_nameProp) res->setProp("_Name",propVal);
          }
        }
        std::getline(*dp_inStream,inLine);
      }
    }    
    
    return res;
  }
Esempio n. 2
0
ROMol *SmilesMolSupplier::processLine(std::string inLine) {
  ROMol *res = NULL;

  try {
    // -----------
    // tokenize the input line:
    // -----------
    boost::char_separator<char> sep(d_delim.c_str(), "",
                                    boost::keep_empty_tokens);
    tokenizer tokens(inLine, sep);
    STR_VECT recs;
    for (tokenizer::iterator tokIter = tokens.begin(); tokIter != tokens.end();
         ++tokIter) {
      std::string rec = strip(*tokIter);
      recs.push_back(rec);
    }
    if (recs.size() <= static_cast<unsigned int>(d_smi)) {
      std::ostringstream errout;
      errout << "ERROR: line #" << d_line << "does not contain enough tokens\n";
      throw FileParseException(errout.str());
    }

    // -----------
    // get the smiles and create a molecule
    // -----------
    res = SmilesToMol(recs[d_smi], 0, df_sanitize);
    if (!res) {
      std::stringstream errout;
      errout << "Cannot create molecule from : '" << recs[d_smi] << "'";
      throw SmilesParseException(errout.str());
    }

    // -----------
    // get the name (if there's a name column)
    // -----------
    if (d_name == -1) {
      // if no name defaults it to the line number we read it from string
      std::ostringstream tstr;
      tstr << d_line;
      std::string mname = tstr.str();
      res->setProp(common_properties::_Name, mname);
    } else {
      if (d_name >= static_cast<int>(recs.size())) {
        BOOST_LOG(rdWarningLog) << "WARNING: no name column found on line "
                                << d_line << std::endl;
      } else {
        res->setProp(common_properties::_Name, recs[d_name]);
      }
    }

    // -----------
    // read in the properties
    // -----------
    unsigned int iprop = 0;
    for (unsigned int col = 0; col < recs.size(); col++) {
      if (static_cast<int>(col) == d_smi || static_cast<int>(col) == d_name)
        continue;
      std::string pname, pval;
      if (d_props.size() > col) {
        pname = d_props[col];
      } else {
        pname = "Column_";
        std::stringstream ss;
        ss << col;
        pname += ss.str();
      }

      pval = recs[col];
      res->setProp(pname, pval);
      iprop++;
    }

  } catch (const SmilesParseException &pe) {
    // Couldn't parse the passed in smiles
    // Simply print out a message
    BOOST_LOG(rdErrorLog) << "ERROR: Smiles parse error on line " << d_line
                          << "\n";
    BOOST_LOG(rdErrorLog) << "ERROR: " << pe.message() << "\n";
    res = NULL;
  } catch (const MolSanitizeException &se) {
    // We couldn't sanitize the molecule
    //  write out an error message
    BOOST_LOG(rdErrorLog) << "ERROR: Could not sanitize molecule on line "
                          << d_line << std::endl;
    BOOST_LOG(rdErrorLog) << "ERROR: " << se.message() << "\n";
    res = NULL;
  } catch (...) {
    //  write out an error message
    BOOST_LOG(rdErrorLog) << "ERROR: Could not process molecule on line "
                          << d_line << std::endl;
    res = NULL;
  }

  return res;
}