Ejemplo n.º 1
0
  bool FastSearchFormat::ObtainTarget(OBConversion* pConv, vector<OBMol>& patternMols, const string& indexname)
  {
    //Obtains an OBMol from:
    // the filename in the -s option or
    // the SMARTS string in the -s option or
    // by converting the file in the -S or -aS options (deprecated).
    // If there is no -s -S or -aS option, information on the index file is displayed.

    OBMol patternMol;
    patternMol.SetIsPatternStructure();

    const char* p = pConv->IsOption("s",OBConversion::GENOPTIONS);

    bool OldSOption=false;
    //If no -s option, make OBMol from file in -S option or -aS option (both deprecated)
    if(!p)
    {
      p = pConv->IsOption("S",OBConversion::GENOPTIONS);
      if(!p)
        p = pConv->IsOption("S",OBConversion::INOPTIONS);//for GUI mainly
      OldSOption = true;
    }
    if(p)
    {
      vector<string> vec;
      tokenize(vec, p);

      //ignore leading ~ (not relevant to fastsearch)
      if(vec[0][0]=='~')
        vec[0].erase(0,1);

      if(vec.size()>1 && vec[1]=="exact")
        pConv->AddOption("e", OBConversion::INOPTIONS);

      OBConversion patternConv;
      OBFormat* pFormat;
      //Interpret as a filename if possible
      string& txt =vec [0];
      if( txt.empty() ||
          txt.find('.')==string::npos ||
          !(pFormat = patternConv.FormatFromExt(txt.c_str())) ||
          !patternConv.SetInFormat(pFormat) ||
          !patternConv.ReadFile(&patternMol, txt) ||
          patternMol.NumAtoms()==0)
        //if false, have a valid patternMol from a file
      {
        //is SMARTS/SMILES
        //Replace e.g. [#6] in SMARTS by C so that it can be converted as SMILES
        //for the fingerprint phase, but allow more generality in the SMARTS phase.
        for(;;)
        {
          string::size_type pos1, pos2;
          pos1 = txt.find("[#");
          if(pos1==string::npos)
            break;
          pos2 = txt.find(']');
          int atno;
          if(pos2!=string::npos &&  (atno = atoi(txt.substr(pos1+2, pos2-pos1-2).c_str())) && atno>0)
            txt.replace(pos1, pos2-pos1+1, etab.GetSymbol(atno));
          else
          {
            obErrorLog.ThrowError(__FUNCTION__,"Ill-formed [#n] atom in SMARTS", obError);
            return false;
          }
        }

        bool hasTildeBond;
        if( (hasTildeBond = (txt.find('~')!=string::npos)) ) // extra parens to indicate truth value
        {
          //Find ~ bonds and make versions of query molecule with a single and aromatic bonds
          //To avoid having to parse the SMILES here, replace ~ by $ (quadruple bond)
          //and then replace this in patternMol. Check first that there are no $ already
          //Sadly, isocynanides may have $ bonds.
          if(txt.find('$')!=string::npos)
          {
            obErrorLog.ThrowError(__FUNCTION__,
              "Cannot use ~ bonds in patterns with $ (quadruple) bonds.)", obError);
            return false;
          }
          replace(txt.begin(),txt.end(), '~' , '$');
        }

        //read as standard SMILES
        patternConv.SetInFormat("smi");
        if(!patternConv.ReadString(&patternMol, vec[0]))
        {
          obErrorLog.ThrowError(__FUNCTION__,"Cannot read the SMILES string",obError);
          return false;
        }
        if(hasTildeBond)
        {
          AddPattern(patternMols, patternMol, 0); //recursively add all combinations of tilde bond values
          return true;
        }
      }
      else
      {
        // target(s) are in a file
        patternMols.push_back(patternMol);
        while(patternConv.Read(&patternMol))
          patternMols.push_back(patternMol);
        return true;
      }
    }

    if(OldSOption) //only when using deprecated -S and -aS options
    {
      //make -s option for later SMARTS test
      OBConversion conv;
      if(conv.SetOutFormat("smi"))
      {
        string optiontext = conv.WriteString(&patternMol, true);
        pConv->AddOption("s", OBConversion::GENOPTIONS, optiontext.c_str());
      }
    }

    if(!p)
    {
      //neither -s or -S options provided. Output info rather than doing search
      const FptIndexHeader& header = fs.GetIndexHeader();
      string id(header.fpid);
      if(id.empty())
        id = "default";
      clog << indexname << " is an index of\n " << header.datafilename
           << ".\n It contains " << header.nEntries
           << " molecules. The fingerprint type is " << id << " with "
           << OBFingerprint::Getbitsperint() * header.words << " bits.\n"
           << "Typical usage for a substructure search:\n"
           << "obabel indexfile.fs -osmi -sSMILES\n"
           << "(-s option in GUI is 'Convert only if match SMARTS or mols in file')" << endl;
      return false;
    }

    patternMols.push_back(patternMol);
    return true;
  }
Ejemplo n.º 2
0
bool FastSearchFormat::ObtainTarget(OBConversion* pConv, OBMol& patternMol, const string& indexname)
{
    //Obtains an OBMol
    //   either from the SMARTS string in the -s option
    //   or by converting the file in the -S option
    //or, if neither option is provided, displays information on the index file.

    stringstream smiles(stringstream::out);
    ifstream patternstream;
    OBConversion PatternConv(&patternstream,&smiles);

    const char* p = pConv->IsOption("s",OBConversion::GENOPTIONS);
    string txt;
    if(p)
    {
        // Use the -s option
        txt=p;
        stringstream smarts(txt, stringstream::in);
        OBConversion Convsm(&smarts);
        if(!Convsm.SetInFormat("smi")) return false;
        Convsm.Read(&patternMol);

        //erase -s option in GeneralOptions since it will be rewritten
        pConv->RemoveOption("s",OBConversion::GENOPTIONS);
        if(patternMol.Empty())
        {
            obErrorLog.ThrowError(__FUNCTION__,
                                  "Could not make a molecule from " + smarts.str()
                                  + "\nThis needs to be valid SMILES when using fastsearch."
                                  "You can use the more versatile SMARTS in a normal substructure search." , obError);
            return false;
        }
    }
    else
    {
        // or Make OBMol from file in -S option or -aS option
        p = pConv->IsOption("S",OBConversion::GENOPTIONS);
        if(!p)
            p = pConv->IsOption("S",OBConversion::INOPTIONS);//for GUI mainly
    }

    if(!p)
    {
        //neither -s or -S options provided. Output info rather than doing search
        const FptIndexHeader& header = fs.GetIndexHeader();
        string id(header.fpid);
        if(id.empty())
            id = "default";
        clog << indexname << " is an index of\n " << header.datafilename
             << ".\n It contains " << header.nEntries
             << " molecules. The fingerprint type is " << id << " with "
             << OBFingerprint::Getbitsperint() * header.words << " bits.\n"
             << "Typical usage for a substructure search:\n"
             << "babel indexfile.fs -osmi -sSMILES" << endl;
        return false;
    }

    if(p && patternMol.Empty())
    {
        txt=p;
        string::size_type pos = txt.find_last_of('.');
        if(pos==string::npos)
        {
            obErrorLog.ThrowError(__FUNCTION__, "Filename of pattern molecule in -S option must have an extension", obError);
            return false;
        }
        patternstream.open(txt.c_str());
        if(!patternstream)
        {
            stringstream errorMsg;

            errorMsg << "Cannot open " << txt << endl;
            obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError);
            return false;
        }

        PatternConv.SetOneObjectOnly();
        if(PatternConv.SetInFormat(txt.substr(pos+1).c_str()))
            PatternConv.Read(&patternMol);
    }

    if(patternMol.Empty())
    {
        obErrorLog.ThrowError(__FUNCTION__, "Cannot derive a molecule from the -s or -S options", obWarning);
        return false;
    }
    patternMol.ConvertDativeBonds();//use standard form for dative bonds

    //Convert to SMILES and generate a -s option for use in the final filtering
    if(!PatternConv.SetOutFormat("smi"))
        return false;
    PatternConv.Write(&patternMol);
    //remove name to leave smiles string
    string smilesstr(smiles.str());
    string::size_type pos = smilesstr.find_first_of(" \t\r\n");
    if(pos!=string::npos)
        smilesstr = smilesstr.substr(0,pos);
    pConv->AddOption("s", OBConversion::GENOPTIONS, smilesstr.c_str());

    return true;
}