Ejemplo n.º 1
0
void testSpaceGroupClean()
{
  // See https://github.com/openbabel/openbabel/pull/254
  OBConversion conv;
  OBMol mol;
  conv.SetInFormat("cif");
  conv.SetOutFormat("pdb");
  conv.ReadFile(&mol, GetFilename("test02.cif"));
  OBUnitCell* pUC = (OBUnitCell*)mol.GetData(OBGenericDataType::UnitCell);
  const SpaceGroup* pSG = pUC->GetSpaceGroup();
  SpaceGroup* sg = new SpaceGroup(*pSG);
  pSG = SpaceGroup::Find(sg);
  OB_ASSERT( pSG != NULL );

  // Check also for errors and warnings
  string summary = obErrorLog.GetMessageSummary();
  OB_ASSERT( summary.find("error") == string::npos);
  OB_ASSERT( summary.find("warning") == string::npos);

  OB_ASSERT( pSG->GetId() == 166 );

  string pdb = conv.WriteString(&mol);
  pdb = conv.WriteString(&mol);

  OB_ASSERT(pdb.find("H -3 m") != string::npos);
}
Ejemplo n.º 2
0
void testPdbOccupancies()
{
  // See https://github.com/openbabel/openbabel/pull/1558
  OBConversion conv;
  OBMol mol;
  conv.SetInFormat("cif");
  conv.SetOutFormat("pdb");
  conv.ReadFile(&mol, GetFilename("test08.cif"));

  string pdb = conv.WriteString(&mol);
  conv.AddOption("o", OBConversion::OUTOPTIONS);
  pdb = conv.WriteString(&mol);

  OB_ASSERT(pdb.find("HETATM    1 NA   UNL     1       0.325   0.000   4.425  0.36") != string::npos);
  OB_ASSERT(pdb.find("HETATM   17  O   UNL     8       1.954   8.956   3.035  1.00") != string::npos);

  OBMol mol_pdb;
  conv.SetInFormat("pdb");
  conv.ReadFile(&mol_pdb, GetFilename("test09.pdb"));

  pdb = conv.WriteString(&mol_pdb);
  OB_ASSERT(pdb.find("HETATM    1 NA   UNL     1       0.325   0.000   4.425  0.36") != string::npos);
  OB_ASSERT(pdb.find("HETATM    2 NA   UNL     1       0.002   8.956   1.393  0.10") != string::npos);
  OB_ASSERT(pdb.find("HETATM   17  O   UNL     8       1.954   8.956   3.035  1.00") != string::npos);
}
Ejemplo n.º 3
0
// A segfault was occuring when a Universal SMILES was output after an InChIfied SMILES.
// This was due to short-circuit caching of InChIs on reading. The fix was to limit
// the situations when the cached value was used, but also to delete the cached value
// in this particular instance.
void test_Issue135_UniversalSmiles()
{
  // Test writing U smiles after I smiles
  OBConversion conv;
  conv.SetInFormat("smi");
  OBMol mol;
  conv.ReadString(&mol, "C(=O)([O-])C(=O)O");
  conv.SetOutFormat("smi");
  conv.SetOptions("I", OBConversion::OUTOPTIONS);
  std::string res = conv.WriteString(&mol, true);
  OB_COMPARE(res, "C(=O)(C(=O)O)[O-]");
  conv.SetOptions("U", OBConversion::OUTOPTIONS);
  res = conv.WriteString(&mol, true);
  OB_COMPARE(res, "C(=O)(C(=O)[O-])O");
}
Ejemplo n.º 4
0
void testPdbRemSpacesHMName()
{
  // See https://github.com/openbabel/openbabel/pull/1558
  OBConversion conv;
  OBMol mol;
  conv.SetInFormat("cif");
  conv.SetOutFormat("pdb");
  conv.ReadFile(&mol, GetFilename("test07.cif"));

  string pdb = conv.WriteString(&mol);
  conv.AddOption("o", OBConversion::OUTOPTIONS);
  pdb = conv.WriteString(&mol);

  OB_ASSERT(pdb.find("I41/amd:2") != string::npos);
}
Ejemplo n.º 5
0
void testPdbOutHexagonalAlternativeOrigin2()
{
  // See https://github.com/openbabel/openbabel/pull/1558
  OBConversion conv;
  OBMol mol;
  conv.SetInFormat("cif");
  conv.SetOutFormat("pdb");
  conv.ReadFile(&mol, GetFilename("test06.cif"));

  string pdb = conv.WriteString(&mol);
  conv.AddOption("o", OBConversion::OUTOPTIONS);
  pdb = conv.WriteString(&mol);

  OB_ASSERT(pdb.find("H -3 m") != string::npos);
}
Ejemplo n.º 6
0
int main() {  
   
     OBAtom a, b, c;
      a.SetAtomicNum(8);
      b.SetAtomicNum(6);
      c.SetAtomicNum(8);

     OBMol mol;
     mol.AddAtom(a);
     mol.AddAtom(b);
     mol.AddAtom(c);
     
     mol.AddBond(1,2,2);
     mol.AddBond(2,3,2);

      OBConversion conv;
      conv.SetOutFormat("SMI");
      cout << conv.WriteString(&mol,1) << endl;
      
     OBSmartsPattern sp;
     
     sp.Init ("C~*");
     
     sp.Match (mol,false);
     
       cout << sp.NumMatches() << endl;
       
        cout << sp.GetUMapList().size() << endl;
      
      return EXIT_SUCCESS;
  }    
Ejemplo n.º 7
0
void testPdbOutAlternativeOrigin()
{
  // See https://github.com/openbabel/openbabel/pull/1558
  OBConversion conv;
  OBMol mol;
  conv.SetInFormat("cif");
  conv.SetOutFormat("pdb");
  conv.ReadFile(&mol, GetFilename("test04.cif"));

  string pdb = conv.WriteString(&mol);
  // ending space is needed to check that there is no origin set
  OB_ASSERT(pdb.find("P 4/n b m ") != string::npos);

  conv.AddOption("o", OBConversion::OUTOPTIONS);
  pdb = conv.WriteString(&mol);

  OB_ASSERT(pdb.find("P 4/n b m:1") != string::npos);
}
Ejemplo n.º 8
0
// Reading an InChI and then adding hydrogens messed up the structure
void test_Issue134_InChI_addH()
{
  OBConversion conv;
  conv.SetInFormat("inchi");
  OBMol mol;
  conv.ReadString(&mol, "InChI=1S/C2H7NO/c1-2(3)4/h2,4H,3H2,1H3/t2-/m0/s1");
  OB_ASSERT(!mol.HasData(OBGenericDataType::VirtualBondData));
  mol.AddHydrogens();
  conv.SetOutFormat("smi");
  std::string res = conv.WriteString(&mol, true);
  OB_COMPARE(res, "C[C@@H](N)O");
}
Ejemplo n.º 9
0
void testCIFMolecules()
{
  // See https://github.com/openbabel/openbabel/pull/1558
  OBConversion conv;
  OBMol mol;
  conv.SetInFormat("cif");
  conv.SetOutFormat("smi"); // check for disconnected fragments
  conv.ReadFile(&mol, GetFilename("1519159.cif"));

  string smi = conv.WriteString(&mol);
  // never, never disconnected fragments from a molecule
  OB_ASSERT(smi.find(".") == string::npos);
}
Ejemplo n.º 10
0
int main(int argc, char **argv)
{
  // Define location of file formats for testing
#ifdef FORMATDIR
    char env[BUFF_SIZE];
    snprintf(env, BUFF_SIZE, "BABEL_LIBDIR=%s", FORMATDIR);
    putenv(env);
#endif  

  std::ifstream ifs(GetFilename("canonstable.can").c_str());
  OB_REQUIRE( ifs );


  OBMol mol;
  OBConversion conv;
  conv.SetInFormat("smi");
  conv.SetOutFormat("can");

  std::string line;
  while (std::getline(ifs, line)) {
    OB_REQUIRE( conv.ReadString(&mol, line.c_str()) );

    std::vector<OBAtom*> atoms;
    FOR_ATOMS_OF_MOL(atom, mol)
      atoms.push_back(&*atom);

    for (int i = 0; i < 5; ++i) {
      // shuffle the atoms
      std::random_shuffle(atoms.begin(), atoms.end());
      mol.RenumberAtoms(atoms);

      // get can smiles
      mol.SetTitle("");
      std::string cansmi = conv.WriteString(&mol, true);
      // comapare with ref
      if (cansmi != line) {
        cout << "ref = " << line << endl;
        cout << "can = " << cansmi << endl;
        OB_ASSERT( cansmi == line );
      }
    }
  }
 
  return 0;
}
bool OpReadConformers::ProcessVec(std::vector<OBBase*>& vec)
{
  // DeferredFormat collects all the molecules, they are processed here, and Deferred Format outputs them
  OBConversion smconv;
  smconv.AddOption("n");
  if(!smconv.SetOutFormat("smi"))
  {
    obErrorLog.ThrowError(__FUNCTION__, "SmilesFormat is not loaded" , obError, onceOnly);
    return false;
  }

  std::string smiles, stored_smiles;
  OBMol* stored_pmol=NULL;
  std::vector<OBBase*>::iterator iter;
  for(iter= vec.begin();iter!=vec.end();++iter)
  {
    OBMol* pmol = dynamic_cast<OBMol*>(*iter);
    if(!pmol)
      continue;
    smiles = smconv.WriteString(pmol);
    Trim(smiles);

    if(stored_smiles==smiles)
    {
      //add the coordinates of the current mol to the stored one as a conformer, and delete current mol
      double *confCoord = new double [pmol->NumAtoms() * 3];
      memcpy((char*)confCoord,(char*)pmol->GetCoordinates(),sizeof(double)*3*pmol->NumAtoms());
      stored_pmol->AddConformer(confCoord);
      delete pmol;
      *iter = NULL;
    }
    else
    {
      stored_pmol = pmol;
      stored_smiles = smiles;
    }
  }

  //erase the NULLS
  vec.erase(std::remove(vec.begin(),vec.end(), (void*)NULL), vec.end());
  return true;
}
Ejemplo n.º 12
0
  bool FastSearchFormat::ObtainTarget(OBConversion* pConv, vector<OBMol>& patternMols, const string& indexname)
  {
    //Obtains an OBMol from:
    // the filename in the -s option or
    // the SMARTS string in the -s option or
    // by converting the file in the -S or -aS options (deprecated).
    // If there is no -s -S or -aS option, information on the index file is displayed.

    OBMol patternMol;
    patternMol.SetIsPatternStructure();

    const char* p = pConv->IsOption("s",OBConversion::GENOPTIONS);

    bool OldSOption=false;
    //If no -s option, make OBMol from file in -S option or -aS option (both deprecated)
    if(!p)
    {
      p = pConv->IsOption("S",OBConversion::GENOPTIONS);
      if(!p)
        p = pConv->IsOption("S",OBConversion::INOPTIONS);//for GUI mainly
      OldSOption = true;
    }
    if(p)
    {
      vector<string> vec;
      tokenize(vec, p);

      //ignore leading ~ (not relevant to fastsearch)
      if(vec[0][0]=='~')
        vec[0].erase(0,1);

      if(vec.size()>1 && vec[1]=="exact")
        pConv->AddOption("e", OBConversion::INOPTIONS);

      OBConversion patternConv;
      OBFormat* pFormat;
      //Interpret as a filename if possible
      string& txt =vec [0];
      if( txt.empty() ||
          txt.find('.')==string::npos ||
          !(pFormat = patternConv.FormatFromExt(txt.c_str())) ||
          !patternConv.SetInFormat(pFormat) ||
          !patternConv.ReadFile(&patternMol, txt) ||
          patternMol.NumAtoms()==0)
        //if false, have a valid patternMol from a file
      {
        //is SMARTS/SMILES
        //Replace e.g. [#6] in SMARTS by C so that it can be converted as SMILES
        //for the fingerprint phase, but allow more generality in the SMARTS phase.
        for(;;)
        {
          string::size_type pos1, pos2;
          pos1 = txt.find("[#");
          if(pos1==string::npos)
            break;
          pos2 = txt.find(']');
          int atno;
          if(pos2!=string::npos &&  (atno = atoi(txt.substr(pos1+2, pos2-pos1-2).c_str())) && atno>0)
            txt.replace(pos1, pos2-pos1+1, etab.GetSymbol(atno));
          else
          {
            obErrorLog.ThrowError(__FUNCTION__,"Ill-formed [#n] atom in SMARTS", obError);
            return false;
          }
        }

        bool hasTildeBond;
        if( (hasTildeBond = (txt.find('~')!=string::npos)) ) // extra parens to indicate truth value
        {
          //Find ~ bonds and make versions of query molecule with a single and aromatic bonds
          //To avoid having to parse the SMILES here, replace ~ by $ (quadruple bond)
          //and then replace this in patternMol. Check first that there are no $ already
          //Sadly, isocynanides may have $ bonds.
          if(txt.find('$')!=string::npos)
          {
            obErrorLog.ThrowError(__FUNCTION__,
              "Cannot use ~ bonds in patterns with $ (quadruple) bonds.)", obError);
            return false;
          }
          replace(txt.begin(),txt.end(), '~' , '$');
        }

        //read as standard SMILES
        patternConv.SetInFormat("smi");
        if(!patternConv.ReadString(&patternMol, vec[0]))
        {
          obErrorLog.ThrowError(__FUNCTION__,"Cannot read the SMILES string",obError);
          return false;
        }
        if(hasTildeBond)
        {
          AddPattern(patternMols, patternMol, 0); //recursively add all combinations of tilde bond values
          return true;
        }
      }
      else
      {
        // target(s) are in a file
        patternMols.push_back(patternMol);
        while(patternConv.Read(&patternMol))
          patternMols.push_back(patternMol);
        return true;
      }
    }

    if(OldSOption) //only when using deprecated -S and -aS options
    {
      //make -s option for later SMARTS test
      OBConversion conv;
      if(conv.SetOutFormat("smi"))
      {
        string optiontext = conv.WriteString(&patternMol, true);
        pConv->AddOption("s", OBConversion::GENOPTIONS, optiontext.c_str());
      }
    }

    if(!p)
    {
      //neither -s or -S options provided. Output info rather than doing search
      const FptIndexHeader& header = fs.GetIndexHeader();
      string id(header.fpid);
      if(id.empty())
        id = "default";
      clog << indexname << " is an index of\n " << header.datafilename
           << ".\n It contains " << header.nEntries
           << " molecules. The fingerprint type is " << id << " with "
           << OBFingerprint::Getbitsperint() * header.words << " bits.\n"
           << "Typical usage for a substructure search:\n"
           << "obabel indexfile.fs -osmi -sSMILES\n"
           << "(-s option in GUI is 'Convert only if match SMARTS or mols in file')" << endl;
      return false;
    }

    patternMols.push_back(patternMol);
    return true;
  }
Ejemplo n.º 13
0
int main(int argc,char *argv[])
{
  // turn off slow sync with C-style output (we don't use it anyway).
  std::ios::sync_with_stdio(false);

  if (argc != 1)
    {
      cout << "Usage: conversion" << endl;
      cout << " Unit tests for OBConversion " << endl;
      return(-1);
    }

  cout << "# Unit tests for OBConversion \n";

  // the number of tests for "prove"
  cout << "1..9\n";

  cout << "ok 1\n"; // for loading tests

  OBMol obMol;
  OBConversion obConversion;
  obConversion.SetInAndOutFormats("smi", "mdl");
  cout << "ok 2\n";

  obConversion.ReadString(&obMol, "C1=CC=CS1");
  cout << "ok 3\n";

  if (obMol.NumAtoms() == 5) {
    cout << "ok 4\n";
  } else {
    cout << "not ok 4\n";
  }

  obMol.AddHydrogens();
  if (obMol.NumAtoms() == 9) {
    cout << "ok 5\n";
  } else {
    cout << "not ok 5\n";
  }

  if ( (obConversion.WriteString(&obMol)).length() > 0)
    cout << "ok 6\n";
  else
    cout << "not ok 6\n";

  // PR#1474265
  obConversion.WriteFile(&obMol, "test.mdl");
  ifstream ifs("test.mdl");
  if (ifs.good())
    cout << "ok 7\n";
  else
    cout << "not ok 7\n";

  // PR#143577
  obConversion.SetInFormat("mdl");
  obConversion.ReadFile(&obMol, "test.mdl");
  if ( remove("test.mdl") != -1)
    cout << "ok 8\n";
  else
    cout << "not ok 8\n";
  
  // gzip input
  // gzip output

  // multi-molecule reading
  // PR#1465586
  // aromatics.smi
  // attype.00.smi

  //ReadFile()
  //Read()
  //WriteString()
  // GetOutputIndex()
  // IsLast

  //ReadString()
  //IsFirstInput
  //Read()

  // splitting
  
  // splitting using gzip-input
  // PR#1357705
  
  // size 0 input
  // PR#1250900
  
  // RegisterFormat
  // FindFormat
  // FormatFromExt
  // FormatFromMIME
  // GetNextFormat
  // GetDefaultFormat

  // BatchFileName
  // IncrementedFileName

  // option handling
  // AddOption
  // IsOption
  // RemoveOption
  // IsOption

  // SetOptions
  // IsOption

  // RegisterOptionParam
  // GetOptionParams

  // GetInStream
  // GetOutStream
  // SetInStream
  // SetOutStream

  // nasty tests
  obConversion.ReadString(&obMol, "");
  obConversion.Read(&obMol);
  cout << "ok 9\n";

  return(0);
}
Ejemplo n.º 14
0
bool OpNewS::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv)
{
  OBMol* pmol = dynamic_cast<OBMol*>(pOb);
  if(!pmol)
    return false;

  // The SMARTS and any other parameters are extracted on the first molecule
  // and stored in the member variables. The parameter is cleared so that
  // the original -s option in transform.cpp is inactive

  //string txt(pmap->find(GetID())->second); // ID can be "s" or "v"

  vector<OBQuery*>::iterator qiter;
  if(OptionText && *OptionText)//(!pConv || pConv->IsFirstInput())
  {
    //Set up on first call
    queries.clear();
    query=NULL;
    nPatternAtoms=0;
    inv=false;

    tokenize(vec, OptionText);
    inv = GetID()[0]=='v';
    if(vec[0][0]=='~')
    {
      inv = true;
      vec[0].erase(0,1);
    }

    //Do not filter out any molecules if there is a parameter "showall";
    //allows -s option to be used for highlighting substructures (--highlight also does this)
    vector<string>::iterator it = std::remove(vec.begin(), vec.end(),"showall");
    showAll = it != vec.end();
    if(showAll)
      vec.erase(it);

    //Store the number of matches required, if as a number in the second parameter, else 0.
    nmatches = 0;
    comparechar = '\0';
    if(vec.size()>1)
    {
      comparechar = vec[1][0];
      if(comparechar=='>' || comparechar=='<')
        vec[1].erase(0,1);
      else
        comparechar = '\0';
      nmatches = atoi(vec[1].c_str());
      if(nmatches) //remove this parameter to still allow coloring
        vec.erase(vec.begin()+1);
    }

    //Interpret as a filename if possible
    MakeQueriesFromMolInFile(queries, vec[0], &nPatternAtoms, strstr(OptionText,"noH"));
    vec.erase(remove(vec.begin(),vec.end(),"noH"),vec.end());//to prevent "noH2" being seen as a color
    
     
    if(queries.empty())
    {
      //SMARTS supplied
    
      // Explicit H in SMARTS requires explicit H in the molecule.
      // Calling AddHydrogens() on a copy of the molecule  is done in parsmart.cpp
      // only when SMARTS contains [H]. Doing more has complications with atom typing,
      // so AddHydrogens here on the molecule (not a copy) when #1 detected.
      addHydrogens = (vec[0].find("#1]")!=string::npos);

      // If extra target mols have been supplied, make a composite SMARTS
      // to test for any of the targets.
      if(ExtraMols.size()>0)
      {
        for(unsigned i=0;i<ExtraMols.size();++i)
        {
          OBConversion extraConv;
          extraConv.AddOption("h");
          if(!extraConv.SetOutFormat("smi"))
            return false;
          // Add option which avoids implicit H being added to the SMARTS.
          // The parameter must be present but can be anything.
          extraConv.AddOption("h",OBConversion::OUTOPTIONS, "X");
          xsmarts += ",$(" + extraConv.WriteString(ExtraMols[i], true) + ")";
        }
      }

      string ysmarts = xsmarts.empty() ? vec[0] : "[$(" + vec[0] + ")" + xsmarts +"]";
      xsmarts.clear();
      if(!sp.Init(ysmarts))
      {
        string msg = ysmarts + " cannot be interpreted as either valid SMARTS "
          "or the name of a file with an extension known to OpenBabel "
          "that contains one or more pattern molecules.";
        obErrorLog.ThrowError(__FUNCTION__, msg, obError, onceOnly);
        delete pmol;
        pmol = NULL;
        pConv->SetOneObjectOnly(); //stop conversion
        return false;
      }
    }
    else
    {
      // Target is in a file. Add extra targets if any supplied
      for(unsigned i=0;i<ExtraMols.size();++i)
        queries.push_back(CompileMoleculeQuery(static_cast<OBMol*>(ExtraMols[i])));
      ExtraMols.clear();
    }

    if(vec.size()>1 && vec[1]=="exact")
    {
      if(queries.empty())
      {
        //Convert SMARTS to SMILES to count number of atoms
        OBConversion conv;
        OBMol patmol;
        if(!conv.SetInFormat("smi") || !conv.ReadString(&patmol, vec[0]))
        {
          obErrorLog.ThrowError(__FUNCTION__, "Cannot read the parameter of -s option, "
          "which has to be valid SMILES when the exact option is used.", obError, onceOnly);
          delete pmol;
          if(pConv)
            pConv->SetOneObjectOnly(); //stop conversion
          return false;
        }
        nPatternAtoms = patmol.NumHvyAtoms();
      }
    }
    else
      nPatternAtoms = 0;

    //disable old versions
    if(pConv)
      pConv->AddOption(GetID(), OBConversion::GENOPTIONS, "");
  }

  bool match = false;
  //These are a vector of each mapping, each containing atom indxs.
  vector<vector<int> > vecatomvec;
  vector<vector<int> >* pMappedAtoms = NULL;

  if(nPatternAtoms)
    if(pmol->NumHvyAtoms() != nPatternAtoms)
      return false;

  unsigned int imol=0; //index of mol in pattern file
  if(!queries.empty()) //filename supplied
  {
    //match is set true if any of the structures match - OR behaviour
    for(qiter=queries.begin();qiter!=queries.end();++qiter, ++imol)
    {
      OBIsomorphismMapper* mapper = OBIsomorphismMapper::GetInstance(*qiter);
      OBIsomorphismMapper::Mappings mappings;
      mapper->MapUnique(pmol, mappings);
      if( (match = !mappings.empty()) ) // extra parens to indicate truth value
      {
        OBIsomorphismMapper::Mappings::iterator ita;
        OBIsomorphismMapper::Mapping::iterator itb;
        for(ita=mappings.begin(); ita!=mappings.end();++ita)//each mapping
        {
          vector<int> atomvec;
          for(itb=ita->begin(); itb!=ita->end();++itb)//each atom index
            atomvec.push_back(itb->second+1);
          vecatomvec.push_back(atomvec);
          atomvec.clear();
        }
        pMappedAtoms = &vecatomvec;
        break;
      }
    }
  }
  else //SMARTS supplied
  {

    if(addHydrogens)
      pmol->AddHydrogens(false,false);

    if( (match = sp.Match(*pmol)) ) // extra parens to indicate truth value
    {
      pMappedAtoms = &sp.GetMapList();
      if(nmatches!=0)
      {
        int n = sp.GetUMapList().size();
        if(comparechar=='>')      match = (n > nmatches);
        else if(comparechar=='<') match = (n < nmatches);
        else                      match = (n == nmatches);
      }
    }
  }

  if((!showAll && (!match && !inv)) || (match && inv))
  {
    //delete a non-matching mol
    delete pmol;
    pmol = NULL;
    return false;
  }

  if(match)
    //Copy the idxes of the first match to a member variable so that it can be retrieved from outside
    firstmatch.assign(pMappedAtoms->begin()->begin(), pMappedAtoms->begin()->end());
  else
    firstmatch.clear();

  if(match && !inv && vec.size()>=2 && !vec[1].empty() && !nPatternAtoms)
  {
    vector<vector<int> >::iterator iter;

    if (vec[1]=="extract" || (vec.size()>3 && vec[2]=="extract"))
    {
      //Delete all unmatched atoms. Use only the first match
      ExtractSubstruct(pmol, *pMappedAtoms->begin());
      return true;
    }

    // color the substructure if there is a second parameter which is not "exact" or "extract" or "noH"
    // with multiple color parameters use the one corresponding to the query molecule, or the last
    if(imol>vec.size()-2)
      imol = vec.size()-2;
    for(iter=pMappedAtoms->begin();iter!=pMappedAtoms->end();++iter)//each match
       AddDataToSubstruct(pmol, *iter, "color", vec[imol+1]);
    return true;
  }

  if(pConv && pConv->IsLast())
  {
    for(qiter=queries.begin();qiter!=queries.end();++qiter)
      delete *qiter;
    queries.clear();
  }
  return true;
}
Ejemplo n.º 15
0
int main(int argc,char *argv[])
{
  // turn off slow sync with C-style output (we don't use it anyway).
  std::ios::sync_with_stdio(false);

  OBConversion conv;
  OBFormat *inFormat, *canFormat;
  OBMol mol;
  ifstream ifs;
  vector<OBMol> fragments;
  unsigned int fragmentCount = 0; // track how many in library -- give a running count
  map<string, int> index; // index of cansmi
  string currentCAN;
  unsigned int size;
  OBAtom *atom;
  OBBond *bond;
  bool nonRingAtoms, nonRingBonds;
  char buffer[BUFF_SIZE];

  canFormat = conv.FindFormat("can");
  conv.SetOutFormat(canFormat);

  if (argc < 2)
    {
      cout << "Usage: obfragment <file>" << endl;
      return(-1);
    }

  for (int i = 1; i < argc; i++) {
    cerr << " Reading file " << argv[i] << endl;

    inFormat = conv.FormatFromExt(argv[i]);
    if(inFormat==NULL || !conv.SetInFormat(inFormat))
      {
        cerr << " Cannot read file format for " << argv[i] << endl;
        continue; // try next file
      }
    
    ifs.open(argv[i]);
    
    if (!ifs)
      {
        cerr << "Cannot read input file: " << argv[i] << endl;
        continue;
      }
    
    
    while(ifs.peek() != EOF && ifs.good())
      {
        conv.Read(&mol, &ifs);
        if (!mol.Has3D()) continue; // invalid coordinates!
        mol.DeleteHydrogens(); // remove these before we do anything else
        
        do {
          nonRingAtoms = false;
          size = mol.NumAtoms();
          for (unsigned int i = 1; i <= size; ++i)
            {
              atom = mol.GetAtom(i);
              if (!atom->IsInRing()) {
                mol.DeleteAtom(atom);
                nonRingAtoms = true;
                break; // don't know how many atoms there are
              } 
              // Previously, we changed atoms to carbon here.
              // Now we perform this alchemy in terms of string-rewriting
              // once the canonical SMILES is generated
            }
        } while (nonRingAtoms);
        
        if (mol.NumAtoms() < 3)
          continue;
        
        if (mol.NumBonds() == 0)
          continue;
        
        do {
          nonRingBonds = false;
          size = mol.NumBonds();
          for (unsigned int i = 0; i < size; ++i)
            {
              bond = mol.GetBond(i);
              if (!bond->IsInRing()) {
                mol.DeleteBond(bond);
                nonRingBonds = true;
                break; // don't know how many bonds there are
              }
            }        
        } while (nonRingBonds);

        fragments = mol.Separate();
        for (unsigned int i = 0; i < fragments.size(); ++i)
          {
            if (fragments[i].NumAtoms() < 3) // too small to care
              continue;
              
            currentCAN = conv.WriteString(&fragments[i], true);
            currentCAN = RewriteSMILES(currentCAN); // change elements to "a/A" for compression
            if (index.find(currentCAN) != index.end()) { // already got this
              index[currentCAN] += 1; // add to the count for bookkeeping
              continue;
            }

            index[currentCAN] = 1; // don't ever write this ring fragment again

            // OK, now retrieve the canonical ordering for the fragment
            vector<string> canonical_order;
            if (fragments[i].HasData("Canonical Atom Order")) {
              OBPairData *data = (OBPairData*)fragments[i].GetData("Canonical Atom Order");
              tokenize(canonical_order, data->GetValue().c_str());
            }

            // Write out an XYZ-style file with the CANSMI as the title
            cout << fragments[i].NumAtoms() << '\n';
            cout << currentCAN << '\n'; // endl causes a flush

            vector<string>::iterator can_iter;
            unsigned int order;
            OBAtom *atom;

            fragments[i].Center();
            fragments[i].ToInertialFrame();

            for (unsigned int index = 0; index < canonical_order.size(); 
                 ++index) {
              order = atoi(canonical_order[index].c_str());
              atom = fragments[i].GetAtom(order);
              
              snprintf(buffer, BUFF_SIZE, "C%8.3f%8.3f%8.3f\n",
                       atom->x(), atom->y(), atom->z());
              cout << buffer;
            }

          }
        fragments.clear();
        if (index.size() > fragmentCount) {
          fragmentCount = index.size();
          cerr << " Fragments: " << fragmentCount << endl;
        }

      } // while reading molecules (in this file)
    ifs.close();
    ifs.clear();
  } // while reading files

  // loop through the map and output frequencies
  map<string, int>::const_iterator indexItr;
  for (indexItr = index.begin(); indexItr != index.end(); ++indexItr) {
    cerr << (*indexItr).second << " INDEX " << (*indexItr).first << "\n";
  }
    
  return(0);
}
Ejemplo n.º 16
0
void genericSmilesCanonicalTest(const std::string &smiles)
{
  cout << "Testing generic smiles <-> canonical smiles" << endl;
  // read a smiles string
  OBMol mol;
  OBConversion conv;
  OB_REQUIRE( conv.SetInFormat("smi") );
  OB_REQUIRE( conv.SetOutFormat("can") );
  cout << "smiles: " << smiles << endl;
  // read a smiles string
  OB_REQUIRE( conv.ReadString(&mol, smiles) );

  // store the stereo data for the smiles string using unique symmetry ids
  std::vector<OBTetrahedralStereo::Config> tetrahedral1;
  std::vector<OBCisTransStereo::Config> cistrans1;
  std::vector<OBSquarePlanarStereo::Config> squareplanar1;

  // get the stereo data
  OB_ASSERT( mol.HasData(OBGenericDataType::StereoData) );
  std::vector<OBGenericData *> stereoData = mol.GetAllData(OBGenericDataType::StereoData);

  std::vector<unsigned int> canlbls;
  std::vector<unsigned int> symclasses;
  OBGraphSym gs1(&mol);
  gs1.GetSymmetry(symclasses);
  CanonicalLabels(&mol, symclasses, canlbls);
  cout << "mol.NumAtoms = " << mol.NumAtoms() << endl;
  for (std::vector<OBGenericData*>::iterator data = stereoData.begin(); data != stereoData.end(); ++data) {
    if (((OBStereoBase*)*data)->GetType() == OBStereo::Tetrahedral) {
      // convert to tetrahedral data
      OBTetrahedralStereo *ts = dynamic_cast<OBTetrahedralStereo*>(*data);
      OB_REQUIRE( ts );
      OB_ASSERT( ts->IsValid() );
      if (!ts->IsValid())
        continue;

      OBTetrahedralStereo::Config config = ts->GetConfig();
      // convert atom ids to symmetry ids
     if (mol.GetAtomById(config.center))
        config.center = canlbls.at( mol.GetAtomById(config.center)->GetIdx() - 1 );
      if (mol.GetAtomById(config.from))
        config.from = canlbls.at( mol.GetAtomById(config.from)->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[0]))
        config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[1]))
        config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[2]))
        config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 );
      cout << "Config with symmetry ids: " << config << endl;
      tetrahedral1.push_back(config);
    } else
    if (((OBStereoBase*)*data)->GetType() == OBStereo::CisTrans) {
      // convert to tetrahedral data
      OBCisTransStereo *ct = dynamic_cast<OBCisTransStereo*>(*data);
      OB_REQUIRE( ct );
      OB_ASSERT( ct->IsValid() );

      OBCisTransStereo::Config config = ct->GetConfig();
      // convert atom ids to symmetry ids
      config.begin = canlbls.at( mol.GetAtomById(config.begin)->GetIdx() - 1 );
      config.end = canlbls.at( mol.GetAtomById(config.end)->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[0]))
        config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[1]))
        config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[2]))
        config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[3]))
        config.refs[3] = canlbls.at( mol.GetAtomById(config.refs[3])->GetIdx() - 1 );
      cout << "Config with symmetry ids: " << config << endl;
      cistrans1.push_back(config);
    } else
    if (((OBStereoBase*)*data)->GetType() == OBStereo::SquarePlanar) {
      // convert to tetrahedral data
      OBSquarePlanarStereo *sp = dynamic_cast<OBSquarePlanarStereo*>(*data);
      OB_REQUIRE( sp );
      OB_ASSERT( sp->IsValid() );
      if (!sp->IsValid())
        continue;

      OBSquarePlanarStereo::Config config = sp->GetConfig();
      // convert atom ids to symmetry ids
     if (mol.GetAtomById(config.center))
        config.center = canlbls.at( mol.GetAtomById(config.center)->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[0]))
        config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[1]))
        config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[2]))
        config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[3]))
        config.refs[3] = canlbls.at( mol.GetAtomById(config.refs[3])->GetIdx() - 1 );
      cout << "Config with symmetry ids: " << config << endl;
      squareplanar1.push_back(config);
    }


  }

  // write to can smiles
  std::string canSmiles = conv.WriteString(&mol);
  cout << "canSmiles: " << canSmiles;
  // read can smiles in again
  OB_REQUIRE( conv.ReadString(&mol, canSmiles) );

  // store the stereo data for the smiles string using unique symmetry ids
  std::vector<OBTetrahedralStereo::Config> tetrahedral2;
  std::vector<OBCisTransStereo::Config> cistrans2;
  std::vector<OBSquarePlanarStereo::Config> squareplanar2;

  // get the stereo data
  OB_ASSERT( mol.HasData(OBGenericDataType::StereoData) );
  stereoData = mol.GetAllData(OBGenericDataType::StereoData);

  OBGraphSym gs2(&mol);
  gs2.GetSymmetry(symclasses);
  CanonicalLabels(&mol, symclasses, canlbls);
  cout << "mol.NumAtoms = " << mol.NumAtoms() << endl;
  for (std::vector<OBGenericData*>::iterator data = stereoData.begin(); data != stereoData.end(); ++data) {
    if (((OBStereoBase*)*data)->GetType() == OBStereo::Tetrahedral) {
      // convert to tetrahedral data
      OBTetrahedralStereo *ts = dynamic_cast<OBTetrahedralStereo*>(*data);
      OB_REQUIRE( ts );
      OB_ASSERT( ts->IsValid() );

      OBTetrahedralStereo::Config config = ts->GetConfig();
      // convert atom ids to symmetry ids
      if (mol.GetAtomById(config.center))
        config.center = canlbls.at( mol.GetAtomById(config.center)->GetIdx() - 1 );
      if (mol.GetAtomById(config.from))
        config.from = canlbls.at( mol.GetAtomById(config.from)->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[0]))
        config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[1]))
        config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[2]))
        config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 );
      cout << "Config with symmetry ids: " << config << endl;
      tetrahedral2.push_back(config);
    }
    if (((OBStereoBase*)*data)->GetType() == OBStereo::CisTrans) {
      // convert to tetrahedral data
      OBCisTransStereo *ct = dynamic_cast<OBCisTransStereo*>(*data);
      OB_REQUIRE( ct );
      OB_ASSERT( ct->IsValid() );

      OBCisTransStereo::Config config = ct->GetConfig();
      // convert atom ids to symmetry ids
      config.begin = canlbls.at( mol.GetAtomById(config.begin)->GetIdx() - 1 );
      config.end = canlbls.at( mol.GetAtomById(config.end)->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[0]))
        config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[1]))
        config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[2]))
        config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[3]))
        config.refs[3] = canlbls.at( mol.GetAtomById(config.refs[3])->GetIdx() - 1 );
      cout << "Config with symmetry ids: " << config << endl;
      cistrans2.push_back(config);
    } else
    if (((OBStereoBase*)*data)->GetType() == OBStereo::SquarePlanar) {
      // convert to tetrahedral data
      OBSquarePlanarStereo *sp = dynamic_cast<OBSquarePlanarStereo*>(*data);
      OB_REQUIRE( sp );
      OB_ASSERT( sp->IsValid() );

      OBSquarePlanarStereo::Config config = sp->GetConfig();
      // convert atom ids to symmetry ids
      if (mol.GetAtomById(config.center))
        config.center = canlbls.at( mol.GetAtomById(config.center)->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[0]))
        config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[1]))
        config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[2]))
        config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 );
      if (mol.GetAtomById(config.refs[3]))
        config.refs[3] = canlbls.at( mol.GetAtomById(config.refs[3])->GetIdx() - 1 );
      cout << "Config with symmetry ids: " << config << endl;
      squareplanar2.push_back(config);
    }

  }

  // compare the tetrahedral structs
  OB_ASSERT( tetrahedral1.size() == tetrahedral2.size() );
  for (unsigned int i = 0; i < tetrahedral1.size(); ++i) {
    for (unsigned int j = 0; j < tetrahedral2.size(); ++j) {
      if (tetrahedral1[i].center == tetrahedral2[j].center)
        OB_ASSERT( tetrahedral1[i] == tetrahedral2[j] );
        if ( tetrahedral1[i] != tetrahedral2[j] ) {
          cout << "1 = " << tetrahedral1[i] << endl;
          cout << "2 = " << tetrahedral2[j] << endl;
        }
    }
  }
  // compare the cistrans structs
  OB_ASSERT( cistrans1.size() == cistrans2.size() );
  for (unsigned int i = 0; i < cistrans1.size(); ++i) {
    for (unsigned int j = 0; j < cistrans2.size(); ++j) {
      if ((cistrans1[i].begin == cistrans2[j].begin) && (cistrans1[i].end == cistrans2[j].end))
        OB_ASSERT( cistrans1[i] == cistrans2[j] );
      if ((cistrans1[i].begin == cistrans2[j].end) && (cistrans1[i].end == cistrans2[j].begin))
        OB_ASSERT( cistrans1[i] == cistrans2[j] );
    }
  }
  // compare the square-planar structs
  OB_ASSERT( squareplanar1.size() == squareplanar2.size() );
  for (unsigned int i = 0; i < squareplanar1.size(); ++i) {
    for (unsigned int j = 0; j < squareplanar2.size(); ++j) {
      if (squareplanar1[i].center == squareplanar2[j].center)
        OB_ASSERT( squareplanar1[i] == squareplanar2[j] );
        if ( squareplanar1[i] != squareplanar2[j] ) {
          cout << "1 = " << squareplanar1[i] << endl;
          cout << "2 = " << squareplanar2[j] << endl;
        }
    }
  }

  cout << "." << endl << endl;
}
Ejemplo n.º 17
0
int main(int argc,char **argv)
{
  char *program_name= argv[0];
  int c;
  char *FileIn = NULL;

  if (argc != 2)
    {
      string err = "Usage: ";
      err += program_name;
      err += " <filename>\n"
      "Output format:\n"
        "name NAME\n"
        "formula  FORMULA\n"
        "mol_weight MOLECULAR_WEIGHT\n"
        "exact_mass ISOTOPIC MASS\n"
        "canonical_SMILES STRING\n"
        "InChI  STRING\n"
        "num_atoms  NUM\n"
        "num_bonds  NUM\n"
        "num_residues  NUM\n"
	"num_rotors NUM\n"
        "sequence RESIDUE_SEQUENCE\n"
        "num_rings NUMBER_OF_RING_(SSSR)\n"
        "logP   NUM\n"
        "PSA    POLAR_SURFACE_AREA\n"
        "MR     MOLAR REFRACTIVITY";
      err += "$$$$";
//      ThrowError(err); wasn't being output because error level too low
      cerr << err; //Why not do directly
      exit(-1);
    }
  else
    {
      FileIn  = argv[1];
    }

  // Find Input filetype
  OBConversion conv;
  OBFormat *format = conv.FormatFromExt(FileIn);
    
  if (!format || !conv.SetInFormat(format))
    {
      cerr << program_name << ": cannot read input format!" << endl;
      exit (-1);
    }

  ifstream ifs;

  // Read the file
  ifs.open(FileIn);
  if (!ifs)
    {
      cerr << program_name << ": cannot read input file!" << endl;
      exit (-1);
    }
  
  OBMol mol;
  OBFormat *canSMIFormat = conv.FindFormat("can");
  OBFormat *inchiFormat = conv.FindFormat("inchi");


  ////////////////////////////////////////////////////////////////////////////
  // List of properties
  // Name
  // Molecular weight (Standard molar mass given by IUPAC atomic masses)
  // Number of rings : the size of the smallest set of smallest rings (SSSR)
  
  //.....ADD YOURS HERE.....
  
  for (c = 1;; ++c)
    {
      mol.Clear();
      conv.Read(&mol, &ifs);
      if (mol.Empty())
        break;
      
      if (!mol.HasHydrogensAdded())
        mol.AddHydrogens();
      // Print the properties
      if (strlen(mol.GetTitle()) != 0)
        cout << "name             " << mol.GetTitle() << endl;
      else 
        cout << "name             " << FileIn << " " << c << endl;

      cout << "formula          " << mol.GetFormula() << endl;
      cout << "mol_weight       " << mol.GetMolWt() << endl;
      cout << "exact_mass       " << mol.GetExactMass() << endl;

      string smilesString = "-";
      if (canSMIFormat) {
        conv.SetOutFormat(canSMIFormat);
        smilesString = conv.WriteString(&mol);
        if ( smilesString.length() == 0 )
        {
          smilesString = "-";
        }
      }
      cout << "canonical_SMILES " << smilesString << endl;

      string inchiString = "-";
      if (inchiFormat) {
        conv.SetOutFormat(inchiFormat);
        inchiString = conv.WriteString(&mol);
        if ( inchiString.length() == 0 )
        {
          inchiString = "-";
        }
      }
      cout << "InChI            " << inchiString << endl;

      cout << "num_atoms        " << mol.NumAtoms() << endl;
      cout << "num_bonds        " << mol.NumBonds() << endl;
      cout << "num_residues     " << mol.NumResidues() << endl;
      cout << "num_rotors       " << mol.NumRotors() << endl;
      if (mol.NumResidues() > 0)
        cout << "sequence         " << sequence(mol) << endl;
      else
        cout << "sequence         " << "-" << endl;

      cout << "num_rings        " << nrings(mol) << endl;

      OBDescriptor* pDesc;
      pDesc= OBDescriptor::FindType("logP");
      if(pDesc)
        cout << "logP             " << pDesc->Predict(&mol) << endl;

      pDesc = OBDescriptor::FindType("TPSA");
      if(pDesc)
        cout << "PSA              " << pDesc->Predict(&mol) << endl;

      pDesc = OBDescriptor::FindType("MR");
      if(pDesc)
        cout << "MR               " << pDesc->Predict(&mol) << endl;

      cout << "$$$$" << endl; // SDF like end of compound descriptor list
      
      //Other OBDescriptors could be output here, even ones that were rarely
      // used. Since these are plugin classes, they may not be loaded, but
      // then with code like the above they are just ignored.
    } // end for loop
  
  return(0);
}