Beispiel #1
0
bool OpNewS::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv)
{
  OBMol* pmol = dynamic_cast<OBMol*>(pOb);
  if(!pmol)
    return false;

  // The SMARTS and any other parameters are extracted on the first molecule
  // and stored in the member variables. The parameter is cleared so that
  // the original -s option in transform.cpp is inactive

  //string txt(pmap->find(GetID())->second); // ID can be "s" or "v"

  vector<OBQuery*>::iterator qiter;
  if(OptionText && *OptionText)//(!pConv || pConv->IsFirstInput())
  {
    //Set up on first call
    queries.clear();
    query=NULL;
    nPatternAtoms=0;
    inv=false;

    tokenize(vec, OptionText);
    inv = GetID()[0]=='v';
    if(vec[0][0]=='~')
    {
      inv = true;
      vec[0].erase(0,1);
    }

    //Do not filter out any molecules if there is a parameter "showall";
    //allows -s option to be used for highlighting substructures (--highlight also does this)
    vector<string>::iterator it = std::remove(vec.begin(), vec.end(),"showall");
    showAll = it != vec.end();
    if(showAll)
      vec.erase(it);

    //Store the number of matches required, if as a number in the second parameter, else 0.
    nmatches = 0;
    comparechar = '\0';
    if(vec.size()>1)
    {
      comparechar = vec[1][0];
      if(comparechar=='>' || comparechar=='<')
        vec[1].erase(0,1);
      else
        comparechar = '\0';
      nmatches = atoi(vec[1].c_str());
      if(nmatches) //remove this parameter to still allow coloring
        vec.erase(vec.begin()+1);
    }

    //Interpret as a filename if possible
    MakeQueriesFromMolInFile(queries, vec[0], &nPatternAtoms, strstr(OptionText,"noH"));
    vec.erase(remove(vec.begin(),vec.end(),"noH"),vec.end());//to prevent "noH2" being seen as a color
    
     
    if(queries.empty())
    {
      //SMARTS supplied
    
      // Explicit H in SMARTS requires explicit H in the molecule.
      // Calling AddHydrogens() on a copy of the molecule  is done in parsmart.cpp
      // only when SMARTS contains [H]. Doing more has complications with atom typing,
      // so AddHydrogens here on the molecule (not a copy) when #1 detected.
      addHydrogens = (vec[0].find("#1]")!=string::npos);

      // If extra target mols have been supplied, make a composite SMARTS
      // to test for any of the targets.
      if(ExtraMols.size()>0)
      {
        for(unsigned i=0;i<ExtraMols.size();++i)
        {
          OBConversion extraConv;
          extraConv.AddOption("h");
          if(!extraConv.SetOutFormat("smi"))
            return false;
          // Add option which avoids implicit H being added to the SMARTS.
          // The parameter must be present but can be anything.
          extraConv.AddOption("h",OBConversion::OUTOPTIONS, "X");
          xsmarts += ",$(" + extraConv.WriteString(ExtraMols[i], true) + ")";
        }
      }

      string ysmarts = xsmarts.empty() ? vec[0] : "[$(" + vec[0] + ")" + xsmarts +"]";
      xsmarts.clear();
      if(!sp.Init(ysmarts))
      {
        string msg = ysmarts + " cannot be interpreted as either valid SMARTS "
          "or the name of a file with an extension known to OpenBabel "
          "that contains one or more pattern molecules.";
        obErrorLog.ThrowError(__FUNCTION__, msg, obError, onceOnly);
        delete pmol;
        pmol = NULL;
        pConv->SetOneObjectOnly(); //stop conversion
        return false;
      }
    }
    else
    {
      // Target is in a file. Add extra targets if any supplied
      for(unsigned i=0;i<ExtraMols.size();++i)
        queries.push_back(CompileMoleculeQuery(static_cast<OBMol*>(ExtraMols[i])));
      ExtraMols.clear();
    }

    if(vec.size()>1 && vec[1]=="exact")
    {
      if(queries.empty())
      {
        //Convert SMARTS to SMILES to count number of atoms
        OBConversion conv;
        OBMol patmol;
        if(!conv.SetInFormat("smi") || !conv.ReadString(&patmol, vec[0]))
        {
          obErrorLog.ThrowError(__FUNCTION__, "Cannot read the parameter of -s option, "
          "which has to be valid SMILES when the exact option is used.", obError, onceOnly);
          delete pmol;
          if(pConv)
            pConv->SetOneObjectOnly(); //stop conversion
          return false;
        }
        nPatternAtoms = patmol.NumHvyAtoms();
      }
    }
    else
      nPatternAtoms = 0;

    //disable old versions
    if(pConv)
      pConv->AddOption(GetID(), OBConversion::GENOPTIONS, "");
  }

  bool match = false;
  //These are a vector of each mapping, each containing atom indxs.
  vector<vector<int> > vecatomvec;
  vector<vector<int> >* pMappedAtoms = NULL;

  if(nPatternAtoms)
    if(pmol->NumHvyAtoms() != nPatternAtoms)
      return false;

  unsigned int imol=0; //index of mol in pattern file
  if(!queries.empty()) //filename supplied
  {
    //match is set true if any of the structures match - OR behaviour
    for(qiter=queries.begin();qiter!=queries.end();++qiter, ++imol)
    {
      OBIsomorphismMapper* mapper = OBIsomorphismMapper::GetInstance(*qiter);
      OBIsomorphismMapper::Mappings mappings;
      mapper->MapUnique(pmol, mappings);
      if( (match = !mappings.empty()) ) // extra parens to indicate truth value
      {
        OBIsomorphismMapper::Mappings::iterator ita;
        OBIsomorphismMapper::Mapping::iterator itb;
        for(ita=mappings.begin(); ita!=mappings.end();++ita)//each mapping
        {
          vector<int> atomvec;
          for(itb=ita->begin(); itb!=ita->end();++itb)//each atom index
            atomvec.push_back(itb->second+1);
          vecatomvec.push_back(atomvec);
          atomvec.clear();
        }
        pMappedAtoms = &vecatomvec;
        break;
      }
    }
  }
  else //SMARTS supplied
  {

    if(addHydrogens)
      pmol->AddHydrogens(false,false);

    if( (match = sp.Match(*pmol)) ) // extra parens to indicate truth value
    {
      pMappedAtoms = &sp.GetMapList();
      if(nmatches!=0)
      {
        int n = sp.GetUMapList().size();
        if(comparechar=='>')      match = (n > nmatches);
        else if(comparechar=='<') match = (n < nmatches);
        else                      match = (n == nmatches);
      }
    }
  }

  if((!showAll && (!match && !inv)) || (match && inv))
  {
    //delete a non-matching mol
    delete pmol;
    pmol = NULL;
    return false;
  }

  if(match)
    //Copy the idxes of the first match to a member variable so that it can be retrieved from outside
    firstmatch.assign(pMappedAtoms->begin()->begin(), pMappedAtoms->begin()->end());
  else
    firstmatch.clear();

  if(match && !inv && vec.size()>=2 && !vec[1].empty() && !nPatternAtoms)
  {
    vector<vector<int> >::iterator iter;

    if (vec[1]=="extract" || (vec.size()>3 && vec[2]=="extract"))
    {
      //Delete all unmatched atoms. Use only the first match
      ExtractSubstruct(pmol, *pMappedAtoms->begin());
      return true;
    }

    // color the substructure if there is a second parameter which is not "exact" or "extract" or "noH"
    // with multiple color parameters use the one corresponding to the query molecule, or the last
    if(imol>vec.size()-2)
      imol = vec.size()-2;
    for(iter=pMappedAtoms->begin();iter!=pMappedAtoms->end();++iter)//each match
       AddDataToSubstruct(pmol, *iter, "color", vec[imol+1]);
    return true;
  }

  if(pConv && pConv->IsLast())
  {
    for(qiter=queries.begin();qiter!=queries.end();++qiter)
      delete *qiter;
    queries.clear();
  }
  return true;
}
Beispiel #2
0
bool OpNewS::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv)
{
  OBMol* pmol = dynamic_cast<OBMol*>(pOb);
  if(!pmol)
    return false;

  // The SMARTS and any other parameters are extracted on the first molecule
  // and stored in the static variables vec, inv. The parameter is cleared so that:
  // (a) the original -s option in transform.cpp is inactive, and
  // (b) the parsing does not have to be done again for multi-molecule files

  string txt(pmap->find(GetID())->second); // ID can be "s" or "v"
  static vector<string> vec;
  static bool inv;
  static int nPatternAtoms; //non-zero for exact matches
  static OBQuery* query;
  static vector<OBQuery*> queries;
  vector<OBQuery*>::iterator qiter;
  if(!txt.empty())
  {
    //Set up on first call
    tokenize(vec, txt);
    inv = GetID()[0]=='v';
    if(vec[0][0]=='~')
    {
      inv = true;
      vec[0].erase(0,1);
    }

    //Interpret as a filename if possible
    MakeQueriesFromMolInFile(queries, vec[0], &nPatternAtoms);

    if(vec.size()>1 && vec[1]=="exact")
    {
      if(queries.empty())
      {
        //Convert SMARTS to SMILES to count number of atoms
        OBConversion conv;
        OBMol patmol;
        if(!conv.SetInFormat("smi") || !conv.ReadString(&patmol, vec[0]))
        {
          obErrorLog.ThrowError(__FUNCTION__, "Cannot read the parameter of -s option, "
          "which has to be valid SMILES when the exact option is used.", obError, onceOnly);
          delete pmol;
          pConv->SetOneObjectOnly(); //stop conversion
          return false;
        }
        nPatternAtoms = patmol.NumHvyAtoms();
      }
    }
    else
      nPatternAtoms = 0;
    
    //disable old versions
    pConv->AddOption(GetID(), OBConversion::GENOPTIONS, "");
  }

  bool match;
  //These are a vector of each mapping, each containing atom indxs.
  vector<vector<int> > vecatomvec;
  vector<vector<int> >* pMappedAtoms = NULL;
  OBSmartsPattern sp;

  if(nPatternAtoms)
    if(pmol->NumHvyAtoms() != nPatternAtoms)
      return false;

  int imol=0; //index of mol in pattern file
  if(!queries.empty()) //filename supplied
  {
    //match is set true if any of the structures match - OR behaviour
    for(qiter=queries.begin();qiter!=queries.end();++qiter, ++imol) 
    {
      OBIsomorphismMapper* mapper = OBIsomorphismMapper::GetInstance(*qiter);
      OBIsomorphismMapper::Mappings mappings;
      mapper->MapUnique(pmol, mappings);
      if( (match = !mappings.empty()) ) // extra parens to indicate truth value
      {
        OBIsomorphismMapper::Mappings::iterator ita;
        OBIsomorphismMapper::Mapping::iterator itb;
        for(ita=mappings.begin(); ita!=mappings.end();++ita)//each mapping
        {
          vector<int> atomvec;
          for(itb=ita->begin(); itb!=ita->end();++itb)//each atom index
            atomvec.push_back(itb->second+1);
          vecatomvec.push_back(atomvec);
          atomvec.clear();
        }
        pMappedAtoms = &vecatomvec;
        break;
      }
    }
  }
  else //SMARTS supplied
  {
    if(!sp.Init(vec[0]))
    {
      string msg = vec[0] + " cannot be interpreted as either valid SMARTS "
        "or the name of a file with an extension known to OpenBabel "
        "that contains one or more pattern molecules.";
      obErrorLog.ThrowError(__FUNCTION__, msg, obError, onceOnly);
      delete pmol;
      pmol = NULL;
      pConv->SetOneObjectOnly(); //stop conversion
      return false;
    }

    if( (match = sp.Match(*pmol)) ) // extra parens to indicate truth value
      pMappedAtoms = &sp.GetMapList();
  }

  if((!match && !inv) || (match && inv))
  {
    //delete a non-matching mol
    delete pmol;
    pmol = NULL;
    return false;
  }

  if(!inv && vec.size()>=2 && !vec[1].empty() && !nPatternAtoms)
  {
    vector<vector<int> >::iterator iter;

    if(vec[1]=="extract")
    {
      //Delete all unmatched atoms. Use only the first match
      ExtractSubstruct(pmol, *pMappedAtoms->begin());
      return true;
    }

    // color the substructure if there is a second parameter which is not "exact" or "extract"
    // with multiple color parameters use the one corresponding to the query molecule, or the last
    if(imol>vec.size()-2)
      imol = vec.size()-2;
    for(iter=pMappedAtoms->begin();iter!=pMappedAtoms->end();++iter)//each match
       AddDataToSubstruct(pmol, *iter, "color", vec[imol+1]);
    return true;
  }

  if(pConv && pConv->IsLast())
  {
    for(qiter=queries.begin();qiter!=queries.end();++qiter)
      delete *qiter;
    queries.clear();
  }
  return true;
}