示例#1
0
void testIsomorphism1()
{
  OBMol mol;
  OBConversion conv;
  conv.SetInFormat("smi");
  conv.ReadString(&mol, "CC1CCC(C)CC1");

  OBQuery *query = CompileMoleculeQuery(&mol);
  OBIsomorphismMapper *mapper = OBIsomorphismMapper::GetInstance(query);
  OBIsomorphismMapper::Mappings maps;
  mapper->MapAll(&mol, maps);

  OB_ASSERT( maps.size() == 4 );

  delete query;
  delete mapper;

  query = CompileSmilesQuery("C1(C)CCC(C)CC1");
  mapper = OBIsomorphismMapper::GetInstance(query);

  OBIsomorphismMapper::Mapping map;
  mapper->MapFirst(&mol, map);
  OB_ASSERT( map.size() == 8 );

  mapper->MapUnique(&mol, maps);
  OB_ASSERT( maps.size() == 1 );

  mapper->MapAll(&mol, maps);
  OB_ASSERT( maps.size() == 4 );

  delete query;
  delete mapper;
}
示例#2
0
void testIsomorphismMask()
{
  // read file: 3 6-rings
  //
  //     /\ /\ /\
  //    |  |  |  |
  //     \/ \/ \/
  //
  OBMol mol;
  OBConversion conv;
  conv.SetInFormat("cml");
  std::ifstream ifs(OBTestUtil::GetFilename("isomorphism1.cml").c_str());
  OB_REQUIRE( ifs );
  conv.Read(&mol, &ifs);

  OBQuery *query = CompileSmilesQuery("C1CCCCC1");
  OBIsomorphismMapper *mapper = OBIsomorphismMapper::GetInstance(query);

  // no mask
  OBIsomorphismMapper::Mappings maps;
  mapper->MapUnique(&mol, maps);
  cout << maps.size() << endl;
  OB_ASSERT( maps.size() == 3 );

  // mask first ring
  OBBitVec mask;
  for (int i = 0; i < 6; ++i)
    mask.SetBitOn(i+1);
  mapper->MapUnique(&mol, maps, mask);
  cout << maps.size() << endl;
  OB_ASSERT( maps.size() == 1 );

  // mask second ring also
  for (int i = 6; i < 10; ++i)
    mask.SetBitOn(i+1);
  mapper->MapUnique(&mol, maps, mask);
  cout << maps.size() << endl;
  OB_ASSERT( maps.size() == 2 );

  // just mask last ring (atomIds 7-8, 10-13)
  mask.Clear();
  for (int i = 10; i < 14; ++i)
    mask.SetBitOn(i+1);
  mask.SetBitOn(7 + 1); mask.SetBitOn(8 + 1);
  mapper->MapUnique(&mol, maps, mask);
  cout << maps.size() << endl;
  OB_ASSERT( maps.size() == 1 ); // Should be same result as masking just the first ring

  delete query;
  delete mapper;
}
示例#3
0
void testAutomorphismMask() {
  // read file: 3 6-rings
  //
  //     /\ /\ /\
  //    |  |  |  |
  //     \/ \/ \/
  //
  cout <<  "testAutomorphismMask" << endl;
  OBMol mol;
  OBConversion conv;
  conv.SetInFormat("cml");
  std::ifstream ifs(OBTestUtil::GetFilename("isomorphism1.cml").c_str());
  OB_REQUIRE( ifs );
  conv.Read(&mol, &ifs);

  OBIsomorphismMapper::Mappings maps;

  // First of all, how many automorphisms are there without any mask?
  // This takes about 20 seconds, so you may want to comment this out while debugging
  FindAutomorphisms(&mol, maps);
  cout << maps.size() << endl;
  OB_ASSERT( maps.size() == 4 );

  // Now, let's remove the bridge (atomId 6) of the central ring.
  //
  //     /\ /\ /\
  //    |  |  |  |
  //     \/    \/
  // both rings can be flipped around exocyclic bond, the whole molecule can be mirrored
  // horizontally, this results in 2 x 2 x 2 = 8 automorphisms
  OBBitVec mask;
  mask.SetRangeOn(1, mol.NumAtoms());
  mask.SetBitOff(6+1);
  FindAutomorphisms(&mol, maps, mask);
  cout << maps.size() << endl;
  for (unsigned int i = 0; i < maps.size(); ++i) {
    OBIsomorphismMapper::Mapping::const_iterator j;
    for (j = maps[i].begin(); j != maps[i].end(); ++j)
      cout << j->second << " ";
    cout << endl;
  }
  OB_ASSERT( maps.size() == 8 );

  // Verify that atom Id 6 does not occur anywhere in the mappings
  OBIsomorphismMapper::Mappings::const_iterator a;
  OBIsomorphismMapper::Mapping::const_iterator b;
  for (a = maps.begin(); a != maps.end(); ++a)
    for (b = a->begin(); b!= a->end(); ++b) {
      OB_ASSERT( b->first != 6 );
      OB_ASSERT( b->second != 6 );
    }
}
示例#4
0
void testIsomorphism4()
{
  cout << "testIsomorphism4" << endl;
  OBMol mol;
  OBConversion conv;
  conv.SetInFormat("smi");
  conv.ReadString(&mol, "C12C(C2)C1");

  OBQuery *query = CompileSmilesQuery("C1CC1");
  OBIsomorphismMapper *mapper = OBIsomorphismMapper::GetInstance(query);
  OBIsomorphismMapper::Mappings maps;
  mapper->MapUnique(&mol, maps);

  cout << maps.size() << endl;

  OB_ASSERT( maps.size() == 2 );

  delete query;
  delete mapper;
}
示例#5
0
bool OpNewS::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv)
{
  OBMol* pmol = dynamic_cast<OBMol*>(pOb);
  if(!pmol)
    return false;

  // The SMARTS and any other parameters are extracted on the first molecule
  // and stored in the member variables. The parameter is cleared so that
  // the original -s option in transform.cpp is inactive

  //string txt(pmap->find(GetID())->second); // ID can be "s" or "v"

  vector<OBQuery*>::iterator qiter;
  if(OptionText && *OptionText)//(!pConv || pConv->IsFirstInput())
  {
    //Set up on first call
    queries.clear();
    query=NULL;
    nPatternAtoms=0;
    inv=false;

    tokenize(vec, OptionText);
    inv = GetID()[0]=='v';
    if(vec[0][0]=='~')
    {
      inv = true;
      vec[0].erase(0,1);
    }

    //Do not filter out any molecules if there is a parameter "showall";
    //allows -s option to be used for highlighting substructures (--highlight also does this)
    vector<string>::iterator it = std::remove(vec.begin(), vec.end(),"showall");
    showAll = it != vec.end();
    if(showAll)
      vec.erase(it);

    //Store the number of matches required, if as a number in the second parameter, else 0.
    nmatches = 0;
    comparechar = '\0';
    if(vec.size()>1)
    {
      comparechar = vec[1][0];
      if(comparechar=='>' || comparechar=='<')
        vec[1].erase(0,1);
      else
        comparechar = '\0';
      nmatches = atoi(vec[1].c_str());
      if(nmatches) //remove this parameter to still allow coloring
        vec.erase(vec.begin()+1);
    }

    //Interpret as a filename if possible
    MakeQueriesFromMolInFile(queries, vec[0], &nPatternAtoms, strstr(OptionText,"noH"));
    vec.erase(remove(vec.begin(),vec.end(),"noH"),vec.end());//to prevent "noH2" being seen as a color
    
     
    if(queries.empty())
    {
      //SMARTS supplied
    
      // Explicit H in SMARTS requires explicit H in the molecule.
      // Calling AddHydrogens() on a copy of the molecule  is done in parsmart.cpp
      // only when SMARTS contains [H]. Doing more has complications with atom typing,
      // so AddHydrogens here on the molecule (not a copy) when #1 detected.
      addHydrogens = (vec[0].find("#1]")!=string::npos);

      // If extra target mols have been supplied, make a composite SMARTS
      // to test for any of the targets.
      if(ExtraMols.size()>0)
      {
        for(unsigned i=0;i<ExtraMols.size();++i)
        {
          OBConversion extraConv;
          extraConv.AddOption("h");
          if(!extraConv.SetOutFormat("smi"))
            return false;
          // Add option which avoids implicit H being added to the SMARTS.
          // The parameter must be present but can be anything.
          extraConv.AddOption("h",OBConversion::OUTOPTIONS, "X");
          xsmarts += ",$(" + extraConv.WriteString(ExtraMols[i], true) + ")";
        }
      }

      string ysmarts = xsmarts.empty() ? vec[0] : "[$(" + vec[0] + ")" + xsmarts +"]";
      xsmarts.clear();
      if(!sp.Init(ysmarts))
      {
        string msg = ysmarts + " cannot be interpreted as either valid SMARTS "
          "or the name of a file with an extension known to OpenBabel "
          "that contains one or more pattern molecules.";
        obErrorLog.ThrowError(__FUNCTION__, msg, obError, onceOnly);
        delete pmol;
        pmol = NULL;
        pConv->SetOneObjectOnly(); //stop conversion
        return false;
      }
    }
    else
    {
      // Target is in a file. Add extra targets if any supplied
      for(unsigned i=0;i<ExtraMols.size();++i)
        queries.push_back(CompileMoleculeQuery(static_cast<OBMol*>(ExtraMols[i])));
      ExtraMols.clear();
    }

    if(vec.size()>1 && vec[1]=="exact")
    {
      if(queries.empty())
      {
        //Convert SMARTS to SMILES to count number of atoms
        OBConversion conv;
        OBMol patmol;
        if(!conv.SetInFormat("smi") || !conv.ReadString(&patmol, vec[0]))
        {
          obErrorLog.ThrowError(__FUNCTION__, "Cannot read the parameter of -s option, "
          "which has to be valid SMILES when the exact option is used.", obError, onceOnly);
          delete pmol;
          if(pConv)
            pConv->SetOneObjectOnly(); //stop conversion
          return false;
        }
        nPatternAtoms = patmol.NumHvyAtoms();
      }
    }
    else
      nPatternAtoms = 0;

    //disable old versions
    if(pConv)
      pConv->AddOption(GetID(), OBConversion::GENOPTIONS, "");
  }

  bool match = false;
  //These are a vector of each mapping, each containing atom indxs.
  vector<vector<int> > vecatomvec;
  vector<vector<int> >* pMappedAtoms = NULL;

  if(nPatternAtoms)
    if(pmol->NumHvyAtoms() != nPatternAtoms)
      return false;

  unsigned int imol=0; //index of mol in pattern file
  if(!queries.empty()) //filename supplied
  {
    //match is set true if any of the structures match - OR behaviour
    for(qiter=queries.begin();qiter!=queries.end();++qiter, ++imol)
    {
      OBIsomorphismMapper* mapper = OBIsomorphismMapper::GetInstance(*qiter);
      OBIsomorphismMapper::Mappings mappings;
      mapper->MapUnique(pmol, mappings);
      if( (match = !mappings.empty()) ) // extra parens to indicate truth value
      {
        OBIsomorphismMapper::Mappings::iterator ita;
        OBIsomorphismMapper::Mapping::iterator itb;
        for(ita=mappings.begin(); ita!=mappings.end();++ita)//each mapping
        {
          vector<int> atomvec;
          for(itb=ita->begin(); itb!=ita->end();++itb)//each atom index
            atomvec.push_back(itb->second+1);
          vecatomvec.push_back(atomvec);
          atomvec.clear();
        }
        pMappedAtoms = &vecatomvec;
        break;
      }
    }
  }
  else //SMARTS supplied
  {

    if(addHydrogens)
      pmol->AddHydrogens(false,false);

    if( (match = sp.Match(*pmol)) ) // extra parens to indicate truth value
    {
      pMappedAtoms = &sp.GetMapList();
      if(nmatches!=0)
      {
        int n = sp.GetUMapList().size();
        if(comparechar=='>')      match = (n > nmatches);
        else if(comparechar=='<') match = (n < nmatches);
        else                      match = (n == nmatches);
      }
    }
  }

  if((!showAll && (!match && !inv)) || (match && inv))
  {
    //delete a non-matching mol
    delete pmol;
    pmol = NULL;
    return false;
  }

  if(match)
    //Copy the idxes of the first match to a member variable so that it can be retrieved from outside
    firstmatch.assign(pMappedAtoms->begin()->begin(), pMappedAtoms->begin()->end());
  else
    firstmatch.clear();

  if(match && !inv && vec.size()>=2 && !vec[1].empty() && !nPatternAtoms)
  {
    vector<vector<int> >::iterator iter;

    if (vec[1]=="extract" || (vec.size()>3 && vec[2]=="extract"))
    {
      //Delete all unmatched atoms. Use only the first match
      ExtractSubstruct(pmol, *pMappedAtoms->begin());
      return true;
    }

    // color the substructure if there is a second parameter which is not "exact" or "extract" or "noH"
    // with multiple color parameters use the one corresponding to the query molecule, or the last
    if(imol>vec.size()-2)
      imol = vec.size()-2;
    for(iter=pMappedAtoms->begin();iter!=pMappedAtoms->end();++iter)//each match
       AddDataToSubstruct(pmol, *iter, "color", vec[imol+1]);
    return true;
  }

  if(pConv && pConv->IsLast())
  {
    for(qiter=queries.begin();qiter!=queries.end();++qiter)
      delete *qiter;
    queries.clear();
  }
  return true;
}
示例#6
0
bool OpNewS::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv)
{
  OBMol* pmol = dynamic_cast<OBMol*>(pOb);
  if(!pmol)
    return false;

  // The SMARTS and any other parameters are extracted on the first molecule
  // and stored in the static variables vec, inv. The parameter is cleared so that:
  // (a) the original -s option in transform.cpp is inactive, and
  // (b) the parsing does not have to be done again for multi-molecule files

  string txt(pmap->find(GetID())->second); // ID can be "s" or "v"
  static vector<string> vec;
  static bool inv;
  static int nPatternAtoms; //non-zero for exact matches
  static OBQuery* query;
  static vector<OBQuery*> queries;
  vector<OBQuery*>::iterator qiter;
  if(!txt.empty())
  {
    //Set up on first call
    tokenize(vec, txt);
    inv = GetID()[0]=='v';
    if(vec[0][0]=='~')
    {
      inv = true;
      vec[0].erase(0,1);
    }

    //Interpret as a filename if possible
    MakeQueriesFromMolInFile(queries, vec[0], &nPatternAtoms);

    if(vec.size()>1 && vec[1]=="exact")
    {
      if(queries.empty())
      {
        //Convert SMARTS to SMILES to count number of atoms
        OBConversion conv;
        OBMol patmol;
        if(!conv.SetInFormat("smi") || !conv.ReadString(&patmol, vec[0]))
        {
          obErrorLog.ThrowError(__FUNCTION__, "Cannot read the parameter of -s option, "
          "which has to be valid SMILES when the exact option is used.", obError, onceOnly);
          delete pmol;
          pConv->SetOneObjectOnly(); //stop conversion
          return false;
        }
        nPatternAtoms = patmol.NumHvyAtoms();
      }
    }
    else
      nPatternAtoms = 0;
    
    //disable old versions
    pConv->AddOption(GetID(), OBConversion::GENOPTIONS, "");
  }

  bool match;
  //These are a vector of each mapping, each containing atom indxs.
  vector<vector<int> > vecatomvec;
  vector<vector<int> >* pMappedAtoms = NULL;
  OBSmartsPattern sp;

  if(nPatternAtoms)
    if(pmol->NumHvyAtoms() != nPatternAtoms)
      return false;

  int imol=0; //index of mol in pattern file
  if(!queries.empty()) //filename supplied
  {
    //match is set true if any of the structures match - OR behaviour
    for(qiter=queries.begin();qiter!=queries.end();++qiter, ++imol) 
    {
      OBIsomorphismMapper* mapper = OBIsomorphismMapper::GetInstance(*qiter);
      OBIsomorphismMapper::Mappings mappings;
      mapper->MapUnique(pmol, mappings);
      if( (match = !mappings.empty()) ) // extra parens to indicate truth value
      {
        OBIsomorphismMapper::Mappings::iterator ita;
        OBIsomorphismMapper::Mapping::iterator itb;
        for(ita=mappings.begin(); ita!=mappings.end();++ita)//each mapping
        {
          vector<int> atomvec;
          for(itb=ita->begin(); itb!=ita->end();++itb)//each atom index
            atomvec.push_back(itb->second+1);
          vecatomvec.push_back(atomvec);
          atomvec.clear();
        }
        pMappedAtoms = &vecatomvec;
        break;
      }
    }
  }
  else //SMARTS supplied
  {
    if(!sp.Init(vec[0]))
    {
      string msg = vec[0] + " cannot be interpreted as either valid SMARTS "
        "or the name of a file with an extension known to OpenBabel "
        "that contains one or more pattern molecules.";
      obErrorLog.ThrowError(__FUNCTION__, msg, obError, onceOnly);
      delete pmol;
      pmol = NULL;
      pConv->SetOneObjectOnly(); //stop conversion
      return false;
    }

    if( (match = sp.Match(*pmol)) ) // extra parens to indicate truth value
      pMappedAtoms = &sp.GetMapList();
  }

  if((!match && !inv) || (match && inv))
  {
    //delete a non-matching mol
    delete pmol;
    pmol = NULL;
    return false;
  }

  if(!inv && vec.size()>=2 && !vec[1].empty() && !nPatternAtoms)
  {
    vector<vector<int> >::iterator iter;

    if(vec[1]=="extract")
    {
      //Delete all unmatched atoms. Use only the first match
      ExtractSubstruct(pmol, *pMappedAtoms->begin());
      return true;
    }

    // color the substructure if there is a second parameter which is not "exact" or "extract"
    // with multiple color parameters use the one corresponding to the query molecule, or the last
    if(imol>vec.size()-2)
      imol = vec.size()-2;
    for(iter=pMappedAtoms->begin();iter!=pMappedAtoms->end();++iter)//each match
       AddDataToSubstruct(pmol, *iter, "color", vec[imol+1]);
    return true;
  }

  if(pConv && pConv->IsLast())
  {
    for(qiter=queries.begin();qiter!=queries.end();++qiter)
      delete *qiter;
    queries.clear();
  }
  return true;
}