void testSpaceGroupClean() { // See https://github.com/openbabel/openbabel/pull/254 OBConversion conv; OBMol mol; conv.SetInFormat("cif"); conv.SetOutFormat("pdb"); conv.ReadFile(&mol, GetFilename("test02.cif")); OBUnitCell* pUC = (OBUnitCell*)mol.GetData(OBGenericDataType::UnitCell); const SpaceGroup* pSG = pUC->GetSpaceGroup(); SpaceGroup* sg = new SpaceGroup(*pSG); pSG = SpaceGroup::Find(sg); OB_ASSERT( pSG != NULL ); // Check also for errors and warnings string summary = obErrorLog.GetMessageSummary(); OB_ASSERT( summary.find("error") == string::npos); OB_ASSERT( summary.find("warning") == string::npos); OB_ASSERT( pSG->GetId() == 166 ); string pdb = conv.WriteString(&mol); pdb = conv.WriteString(&mol); OB_ASSERT(pdb.find("H -3 m") != string::npos); }
void testPdbOccupancies() { // See https://github.com/openbabel/openbabel/pull/1558 OBConversion conv; OBMol mol; conv.SetInFormat("cif"); conv.SetOutFormat("pdb"); conv.ReadFile(&mol, GetFilename("test08.cif")); string pdb = conv.WriteString(&mol); conv.AddOption("o", OBConversion::OUTOPTIONS); pdb = conv.WriteString(&mol); OB_ASSERT(pdb.find("HETATM 1 NA UNL 1 0.325 0.000 4.425 0.36") != string::npos); OB_ASSERT(pdb.find("HETATM 17 O UNL 8 1.954 8.956 3.035 1.00") != string::npos); OBMol mol_pdb; conv.SetInFormat("pdb"); conv.ReadFile(&mol_pdb, GetFilename("test09.pdb")); pdb = conv.WriteString(&mol_pdb); OB_ASSERT(pdb.find("HETATM 1 NA UNL 1 0.325 0.000 4.425 0.36") != string::npos); OB_ASSERT(pdb.find("HETATM 2 NA UNL 1 0.002 8.956 1.393 0.10") != string::npos); OB_ASSERT(pdb.find("HETATM 17 O UNL 8 1.954 8.956 3.035 1.00") != string::npos); }
// A segfault was occuring when a Universal SMILES was output after an InChIfied SMILES. // This was due to short-circuit caching of InChIs on reading. The fix was to limit // the situations when the cached value was used, but also to delete the cached value // in this particular instance. void test_Issue135_UniversalSmiles() { // Test writing U smiles after I smiles OBConversion conv; conv.SetInFormat("smi"); OBMol mol; conv.ReadString(&mol, "C(=O)([O-])C(=O)O"); conv.SetOutFormat("smi"); conv.SetOptions("I", OBConversion::OUTOPTIONS); std::string res = conv.WriteString(&mol, true); OB_COMPARE(res, "C(=O)(C(=O)O)[O-]"); conv.SetOptions("U", OBConversion::OUTOPTIONS); res = conv.WriteString(&mol, true); OB_COMPARE(res, "C(=O)(C(=O)[O-])O"); }
void testPdbRemSpacesHMName() { // See https://github.com/openbabel/openbabel/pull/1558 OBConversion conv; OBMol mol; conv.SetInFormat("cif"); conv.SetOutFormat("pdb"); conv.ReadFile(&mol, GetFilename("test07.cif")); string pdb = conv.WriteString(&mol); conv.AddOption("o", OBConversion::OUTOPTIONS); pdb = conv.WriteString(&mol); OB_ASSERT(pdb.find("I41/amd:2") != string::npos); }
void testPdbOutHexagonalAlternativeOrigin2() { // See https://github.com/openbabel/openbabel/pull/1558 OBConversion conv; OBMol mol; conv.SetInFormat("cif"); conv.SetOutFormat("pdb"); conv.ReadFile(&mol, GetFilename("test06.cif")); string pdb = conv.WriteString(&mol); conv.AddOption("o", OBConversion::OUTOPTIONS); pdb = conv.WriteString(&mol); OB_ASSERT(pdb.find("H -3 m") != string::npos); }
int main() { OBAtom a, b, c; a.SetAtomicNum(8); b.SetAtomicNum(6); c.SetAtomicNum(8); OBMol mol; mol.AddAtom(a); mol.AddAtom(b); mol.AddAtom(c); mol.AddBond(1,2,2); mol.AddBond(2,3,2); OBConversion conv; conv.SetOutFormat("SMI"); cout << conv.WriteString(&mol,1) << endl; OBSmartsPattern sp; sp.Init ("C~*"); sp.Match (mol,false); cout << sp.NumMatches() << endl; cout << sp.GetUMapList().size() << endl; return EXIT_SUCCESS; }
void testPdbOutAlternativeOrigin() { // See https://github.com/openbabel/openbabel/pull/1558 OBConversion conv; OBMol mol; conv.SetInFormat("cif"); conv.SetOutFormat("pdb"); conv.ReadFile(&mol, GetFilename("test04.cif")); string pdb = conv.WriteString(&mol); // ending space is needed to check that there is no origin set OB_ASSERT(pdb.find("P 4/n b m ") != string::npos); conv.AddOption("o", OBConversion::OUTOPTIONS); pdb = conv.WriteString(&mol); OB_ASSERT(pdb.find("P 4/n b m:1") != string::npos); }
// Reading an InChI and then adding hydrogens messed up the structure void test_Issue134_InChI_addH() { OBConversion conv; conv.SetInFormat("inchi"); OBMol mol; conv.ReadString(&mol, "InChI=1S/C2H7NO/c1-2(3)4/h2,4H,3H2,1H3/t2-/m0/s1"); OB_ASSERT(!mol.HasData(OBGenericDataType::VirtualBondData)); mol.AddHydrogens(); conv.SetOutFormat("smi"); std::string res = conv.WriteString(&mol, true); OB_COMPARE(res, "C[C@@H](N)O"); }
void testCIFMolecules() { // See https://github.com/openbabel/openbabel/pull/1558 OBConversion conv; OBMol mol; conv.SetInFormat("cif"); conv.SetOutFormat("smi"); // check for disconnected fragments conv.ReadFile(&mol, GetFilename("1519159.cif")); string smi = conv.WriteString(&mol); // never, never disconnected fragments from a molecule OB_ASSERT(smi.find(".") == string::npos); }
int main(int argc, char **argv) { // Define location of file formats for testing #ifdef FORMATDIR char env[BUFF_SIZE]; snprintf(env, BUFF_SIZE, "BABEL_LIBDIR=%s", FORMATDIR); putenv(env); #endif std::ifstream ifs(GetFilename("canonstable.can").c_str()); OB_REQUIRE( ifs ); OBMol mol; OBConversion conv; conv.SetInFormat("smi"); conv.SetOutFormat("can"); std::string line; while (std::getline(ifs, line)) { OB_REQUIRE( conv.ReadString(&mol, line.c_str()) ); std::vector<OBAtom*> atoms; FOR_ATOMS_OF_MOL(atom, mol) atoms.push_back(&*atom); for (int i = 0; i < 5; ++i) { // shuffle the atoms std::random_shuffle(atoms.begin(), atoms.end()); mol.RenumberAtoms(atoms); // get can smiles mol.SetTitle(""); std::string cansmi = conv.WriteString(&mol, true); // comapare with ref if (cansmi != line) { cout << "ref = " << line << endl; cout << "can = " << cansmi << endl; OB_ASSERT( cansmi == line ); } } } return 0; }
bool OpReadConformers::ProcessVec(std::vector<OBBase*>& vec) { // DeferredFormat collects all the molecules, they are processed here, and Deferred Format outputs them OBConversion smconv; smconv.AddOption("n"); if(!smconv.SetOutFormat("smi")) { obErrorLog.ThrowError(__FUNCTION__, "SmilesFormat is not loaded" , obError, onceOnly); return false; } std::string smiles, stored_smiles; OBMol* stored_pmol=NULL; std::vector<OBBase*>::iterator iter; for(iter= vec.begin();iter!=vec.end();++iter) { OBMol* pmol = dynamic_cast<OBMol*>(*iter); if(!pmol) continue; smiles = smconv.WriteString(pmol); Trim(smiles); if(stored_smiles==smiles) { //add the coordinates of the current mol to the stored one as a conformer, and delete current mol double *confCoord = new double [pmol->NumAtoms() * 3]; memcpy((char*)confCoord,(char*)pmol->GetCoordinates(),sizeof(double)*3*pmol->NumAtoms()); stored_pmol->AddConformer(confCoord); delete pmol; *iter = NULL; } else { stored_pmol = pmol; stored_smiles = smiles; } } //erase the NULLS vec.erase(std::remove(vec.begin(),vec.end(), (void*)NULL), vec.end()); return true; }
bool FastSearchFormat::ObtainTarget(OBConversion* pConv, vector<OBMol>& patternMols, const string& indexname) { //Obtains an OBMol from: // the filename in the -s option or // the SMARTS string in the -s option or // by converting the file in the -S or -aS options (deprecated). // If there is no -s -S or -aS option, information on the index file is displayed. OBMol patternMol; patternMol.SetIsPatternStructure(); const char* p = pConv->IsOption("s",OBConversion::GENOPTIONS); bool OldSOption=false; //If no -s option, make OBMol from file in -S option or -aS option (both deprecated) if(!p) { p = pConv->IsOption("S",OBConversion::GENOPTIONS); if(!p) p = pConv->IsOption("S",OBConversion::INOPTIONS);//for GUI mainly OldSOption = true; } if(p) { vector<string> vec; tokenize(vec, p); //ignore leading ~ (not relevant to fastsearch) if(vec[0][0]=='~') vec[0].erase(0,1); if(vec.size()>1 && vec[1]=="exact") pConv->AddOption("e", OBConversion::INOPTIONS); OBConversion patternConv; OBFormat* pFormat; //Interpret as a filename if possible string& txt =vec [0]; if( txt.empty() || txt.find('.')==string::npos || !(pFormat = patternConv.FormatFromExt(txt.c_str())) || !patternConv.SetInFormat(pFormat) || !patternConv.ReadFile(&patternMol, txt) || patternMol.NumAtoms()==0) //if false, have a valid patternMol from a file { //is SMARTS/SMILES //Replace e.g. [#6] in SMARTS by C so that it can be converted as SMILES //for the fingerprint phase, but allow more generality in the SMARTS phase. for(;;) { string::size_type pos1, pos2; pos1 = txt.find("[#"); if(pos1==string::npos) break; pos2 = txt.find(']'); int atno; if(pos2!=string::npos && (atno = atoi(txt.substr(pos1+2, pos2-pos1-2).c_str())) && atno>0) txt.replace(pos1, pos2-pos1+1, etab.GetSymbol(atno)); else { obErrorLog.ThrowError(__FUNCTION__,"Ill-formed [#n] atom in SMARTS", obError); return false; } } bool hasTildeBond; if( (hasTildeBond = (txt.find('~')!=string::npos)) ) // extra parens to indicate truth value { //Find ~ bonds and make versions of query molecule with a single and aromatic bonds //To avoid having to parse the SMILES here, replace ~ by $ (quadruple bond) //and then replace this in patternMol. Check first that there are no $ already //Sadly, isocynanides may have $ bonds. if(txt.find('$')!=string::npos) { obErrorLog.ThrowError(__FUNCTION__, "Cannot use ~ bonds in patterns with $ (quadruple) bonds.)", obError); return false; } replace(txt.begin(),txt.end(), '~' , '$'); } //read as standard SMILES patternConv.SetInFormat("smi"); if(!patternConv.ReadString(&patternMol, vec[0])) { obErrorLog.ThrowError(__FUNCTION__,"Cannot read the SMILES string",obError); return false; } if(hasTildeBond) { AddPattern(patternMols, patternMol, 0); //recursively add all combinations of tilde bond values return true; } } else { // target(s) are in a file patternMols.push_back(patternMol); while(patternConv.Read(&patternMol)) patternMols.push_back(patternMol); return true; } } if(OldSOption) //only when using deprecated -S and -aS options { //make -s option for later SMARTS test OBConversion conv; if(conv.SetOutFormat("smi")) { string optiontext = conv.WriteString(&patternMol, true); pConv->AddOption("s", OBConversion::GENOPTIONS, optiontext.c_str()); } } if(!p) { //neither -s or -S options provided. Output info rather than doing search const FptIndexHeader& header = fs.GetIndexHeader(); string id(header.fpid); if(id.empty()) id = "default"; clog << indexname << " is an index of\n " << header.datafilename << ".\n It contains " << header.nEntries << " molecules. The fingerprint type is " << id << " with " << OBFingerprint::Getbitsperint() * header.words << " bits.\n" << "Typical usage for a substructure search:\n" << "obabel indexfile.fs -osmi -sSMILES\n" << "(-s option in GUI is 'Convert only if match SMARTS or mols in file')" << endl; return false; } patternMols.push_back(patternMol); return true; }
int main(int argc,char *argv[]) { // turn off slow sync with C-style output (we don't use it anyway). std::ios::sync_with_stdio(false); if (argc != 1) { cout << "Usage: conversion" << endl; cout << " Unit tests for OBConversion " << endl; return(-1); } cout << "# Unit tests for OBConversion \n"; // the number of tests for "prove" cout << "1..9\n"; cout << "ok 1\n"; // for loading tests OBMol obMol; OBConversion obConversion; obConversion.SetInAndOutFormats("smi", "mdl"); cout << "ok 2\n"; obConversion.ReadString(&obMol, "C1=CC=CS1"); cout << "ok 3\n"; if (obMol.NumAtoms() == 5) { cout << "ok 4\n"; } else { cout << "not ok 4\n"; } obMol.AddHydrogens(); if (obMol.NumAtoms() == 9) { cout << "ok 5\n"; } else { cout << "not ok 5\n"; } if ( (obConversion.WriteString(&obMol)).length() > 0) cout << "ok 6\n"; else cout << "not ok 6\n"; // PR#1474265 obConversion.WriteFile(&obMol, "test.mdl"); ifstream ifs("test.mdl"); if (ifs.good()) cout << "ok 7\n"; else cout << "not ok 7\n"; // PR#143577 obConversion.SetInFormat("mdl"); obConversion.ReadFile(&obMol, "test.mdl"); if ( remove("test.mdl") != -1) cout << "ok 8\n"; else cout << "not ok 8\n"; // gzip input // gzip output // multi-molecule reading // PR#1465586 // aromatics.smi // attype.00.smi //ReadFile() //Read() //WriteString() // GetOutputIndex() // IsLast //ReadString() //IsFirstInput //Read() // splitting // splitting using gzip-input // PR#1357705 // size 0 input // PR#1250900 // RegisterFormat // FindFormat // FormatFromExt // FormatFromMIME // GetNextFormat // GetDefaultFormat // BatchFileName // IncrementedFileName // option handling // AddOption // IsOption // RemoveOption // IsOption // SetOptions // IsOption // RegisterOptionParam // GetOptionParams // GetInStream // GetOutStream // SetInStream // SetOutStream // nasty tests obConversion.ReadString(&obMol, ""); obConversion.Read(&obMol); cout << "ok 9\n"; return(0); }
bool OpNewS::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv) { OBMol* pmol = dynamic_cast<OBMol*>(pOb); if(!pmol) return false; // The SMARTS and any other parameters are extracted on the first molecule // and stored in the member variables. The parameter is cleared so that // the original -s option in transform.cpp is inactive //string txt(pmap->find(GetID())->second); // ID can be "s" or "v" vector<OBQuery*>::iterator qiter; if(OptionText && *OptionText)//(!pConv || pConv->IsFirstInput()) { //Set up on first call queries.clear(); query=NULL; nPatternAtoms=0; inv=false; tokenize(vec, OptionText); inv = GetID()[0]=='v'; if(vec[0][0]=='~') { inv = true; vec[0].erase(0,1); } //Do not filter out any molecules if there is a parameter "showall"; //allows -s option to be used for highlighting substructures (--highlight also does this) vector<string>::iterator it = std::remove(vec.begin(), vec.end(),"showall"); showAll = it != vec.end(); if(showAll) vec.erase(it); //Store the number of matches required, if as a number in the second parameter, else 0. nmatches = 0; comparechar = '\0'; if(vec.size()>1) { comparechar = vec[1][0]; if(comparechar=='>' || comparechar=='<') vec[1].erase(0,1); else comparechar = '\0'; nmatches = atoi(vec[1].c_str()); if(nmatches) //remove this parameter to still allow coloring vec.erase(vec.begin()+1); } //Interpret as a filename if possible MakeQueriesFromMolInFile(queries, vec[0], &nPatternAtoms, strstr(OptionText,"noH")); vec.erase(remove(vec.begin(),vec.end(),"noH"),vec.end());//to prevent "noH2" being seen as a color if(queries.empty()) { //SMARTS supplied // Explicit H in SMARTS requires explicit H in the molecule. // Calling AddHydrogens() on a copy of the molecule is done in parsmart.cpp // only when SMARTS contains [H]. Doing more has complications with atom typing, // so AddHydrogens here on the molecule (not a copy) when #1 detected. addHydrogens = (vec[0].find("#1]")!=string::npos); // If extra target mols have been supplied, make a composite SMARTS // to test for any of the targets. if(ExtraMols.size()>0) { for(unsigned i=0;i<ExtraMols.size();++i) { OBConversion extraConv; extraConv.AddOption("h"); if(!extraConv.SetOutFormat("smi")) return false; // Add option which avoids implicit H being added to the SMARTS. // The parameter must be present but can be anything. extraConv.AddOption("h",OBConversion::OUTOPTIONS, "X"); xsmarts += ",$(" + extraConv.WriteString(ExtraMols[i], true) + ")"; } } string ysmarts = xsmarts.empty() ? vec[0] : "[$(" + vec[0] + ")" + xsmarts +"]"; xsmarts.clear(); if(!sp.Init(ysmarts)) { string msg = ysmarts + " cannot be interpreted as either valid SMARTS " "or the name of a file with an extension known to OpenBabel " "that contains one or more pattern molecules."; obErrorLog.ThrowError(__FUNCTION__, msg, obError, onceOnly); delete pmol; pmol = NULL; pConv->SetOneObjectOnly(); //stop conversion return false; } } else { // Target is in a file. Add extra targets if any supplied for(unsigned i=0;i<ExtraMols.size();++i) queries.push_back(CompileMoleculeQuery(static_cast<OBMol*>(ExtraMols[i]))); ExtraMols.clear(); } if(vec.size()>1 && vec[1]=="exact") { if(queries.empty()) { //Convert SMARTS to SMILES to count number of atoms OBConversion conv; OBMol patmol; if(!conv.SetInFormat("smi") || !conv.ReadString(&patmol, vec[0])) { obErrorLog.ThrowError(__FUNCTION__, "Cannot read the parameter of -s option, " "which has to be valid SMILES when the exact option is used.", obError, onceOnly); delete pmol; if(pConv) pConv->SetOneObjectOnly(); //stop conversion return false; } nPatternAtoms = patmol.NumHvyAtoms(); } } else nPatternAtoms = 0; //disable old versions if(pConv) pConv->AddOption(GetID(), OBConversion::GENOPTIONS, ""); } bool match = false; //These are a vector of each mapping, each containing atom indxs. vector<vector<int> > vecatomvec; vector<vector<int> >* pMappedAtoms = NULL; if(nPatternAtoms) if(pmol->NumHvyAtoms() != nPatternAtoms) return false; unsigned int imol=0; //index of mol in pattern file if(!queries.empty()) //filename supplied { //match is set true if any of the structures match - OR behaviour for(qiter=queries.begin();qiter!=queries.end();++qiter, ++imol) { OBIsomorphismMapper* mapper = OBIsomorphismMapper::GetInstance(*qiter); OBIsomorphismMapper::Mappings mappings; mapper->MapUnique(pmol, mappings); if( (match = !mappings.empty()) ) // extra parens to indicate truth value { OBIsomorphismMapper::Mappings::iterator ita; OBIsomorphismMapper::Mapping::iterator itb; for(ita=mappings.begin(); ita!=mappings.end();++ita)//each mapping { vector<int> atomvec; for(itb=ita->begin(); itb!=ita->end();++itb)//each atom index atomvec.push_back(itb->second+1); vecatomvec.push_back(atomvec); atomvec.clear(); } pMappedAtoms = &vecatomvec; break; } } } else //SMARTS supplied { if(addHydrogens) pmol->AddHydrogens(false,false); if( (match = sp.Match(*pmol)) ) // extra parens to indicate truth value { pMappedAtoms = &sp.GetMapList(); if(nmatches!=0) { int n = sp.GetUMapList().size(); if(comparechar=='>') match = (n > nmatches); else if(comparechar=='<') match = (n < nmatches); else match = (n == nmatches); } } } if((!showAll && (!match && !inv)) || (match && inv)) { //delete a non-matching mol delete pmol; pmol = NULL; return false; } if(match) //Copy the idxes of the first match to a member variable so that it can be retrieved from outside firstmatch.assign(pMappedAtoms->begin()->begin(), pMappedAtoms->begin()->end()); else firstmatch.clear(); if(match && !inv && vec.size()>=2 && !vec[1].empty() && !nPatternAtoms) { vector<vector<int> >::iterator iter; if (vec[1]=="extract" || (vec.size()>3 && vec[2]=="extract")) { //Delete all unmatched atoms. Use only the first match ExtractSubstruct(pmol, *pMappedAtoms->begin()); return true; } // color the substructure if there is a second parameter which is not "exact" or "extract" or "noH" // with multiple color parameters use the one corresponding to the query molecule, or the last if(imol>vec.size()-2) imol = vec.size()-2; for(iter=pMappedAtoms->begin();iter!=pMappedAtoms->end();++iter)//each match AddDataToSubstruct(pmol, *iter, "color", vec[imol+1]); return true; } if(pConv && pConv->IsLast()) { for(qiter=queries.begin();qiter!=queries.end();++qiter) delete *qiter; queries.clear(); } return true; }
int main(int argc,char *argv[]) { // turn off slow sync with C-style output (we don't use it anyway). std::ios::sync_with_stdio(false); OBConversion conv; OBFormat *inFormat, *canFormat; OBMol mol; ifstream ifs; vector<OBMol> fragments; unsigned int fragmentCount = 0; // track how many in library -- give a running count map<string, int> index; // index of cansmi string currentCAN; unsigned int size; OBAtom *atom; OBBond *bond; bool nonRingAtoms, nonRingBonds; char buffer[BUFF_SIZE]; canFormat = conv.FindFormat("can"); conv.SetOutFormat(canFormat); if (argc < 2) { cout << "Usage: obfragment <file>" << endl; return(-1); } for (int i = 1; i < argc; i++) { cerr << " Reading file " << argv[i] << endl; inFormat = conv.FormatFromExt(argv[i]); if(inFormat==NULL || !conv.SetInFormat(inFormat)) { cerr << " Cannot read file format for " << argv[i] << endl; continue; // try next file } ifs.open(argv[i]); if (!ifs) { cerr << "Cannot read input file: " << argv[i] << endl; continue; } while(ifs.peek() != EOF && ifs.good()) { conv.Read(&mol, &ifs); if (!mol.Has3D()) continue; // invalid coordinates! mol.DeleteHydrogens(); // remove these before we do anything else do { nonRingAtoms = false; size = mol.NumAtoms(); for (unsigned int i = 1; i <= size; ++i) { atom = mol.GetAtom(i); if (!atom->IsInRing()) { mol.DeleteAtom(atom); nonRingAtoms = true; break; // don't know how many atoms there are } // Previously, we changed atoms to carbon here. // Now we perform this alchemy in terms of string-rewriting // once the canonical SMILES is generated } } while (nonRingAtoms); if (mol.NumAtoms() < 3) continue; if (mol.NumBonds() == 0) continue; do { nonRingBonds = false; size = mol.NumBonds(); for (unsigned int i = 0; i < size; ++i) { bond = mol.GetBond(i); if (!bond->IsInRing()) { mol.DeleteBond(bond); nonRingBonds = true; break; // don't know how many bonds there are } } } while (nonRingBonds); fragments = mol.Separate(); for (unsigned int i = 0; i < fragments.size(); ++i) { if (fragments[i].NumAtoms() < 3) // too small to care continue; currentCAN = conv.WriteString(&fragments[i], true); currentCAN = RewriteSMILES(currentCAN); // change elements to "a/A" for compression if (index.find(currentCAN) != index.end()) { // already got this index[currentCAN] += 1; // add to the count for bookkeeping continue; } index[currentCAN] = 1; // don't ever write this ring fragment again // OK, now retrieve the canonical ordering for the fragment vector<string> canonical_order; if (fragments[i].HasData("Canonical Atom Order")) { OBPairData *data = (OBPairData*)fragments[i].GetData("Canonical Atom Order"); tokenize(canonical_order, data->GetValue().c_str()); } // Write out an XYZ-style file with the CANSMI as the title cout << fragments[i].NumAtoms() << '\n'; cout << currentCAN << '\n'; // endl causes a flush vector<string>::iterator can_iter; unsigned int order; OBAtom *atom; fragments[i].Center(); fragments[i].ToInertialFrame(); for (unsigned int index = 0; index < canonical_order.size(); ++index) { order = atoi(canonical_order[index].c_str()); atom = fragments[i].GetAtom(order); snprintf(buffer, BUFF_SIZE, "C%8.3f%8.3f%8.3f\n", atom->x(), atom->y(), atom->z()); cout << buffer; } } fragments.clear(); if (index.size() > fragmentCount) { fragmentCount = index.size(); cerr << " Fragments: " << fragmentCount << endl; } } // while reading molecules (in this file) ifs.close(); ifs.clear(); } // while reading files // loop through the map and output frequencies map<string, int>::const_iterator indexItr; for (indexItr = index.begin(); indexItr != index.end(); ++indexItr) { cerr << (*indexItr).second << " INDEX " << (*indexItr).first << "\n"; } return(0); }
void genericSmilesCanonicalTest(const std::string &smiles) { cout << "Testing generic smiles <-> canonical smiles" << endl; // read a smiles string OBMol mol; OBConversion conv; OB_REQUIRE( conv.SetInFormat("smi") ); OB_REQUIRE( conv.SetOutFormat("can") ); cout << "smiles: " << smiles << endl; // read a smiles string OB_REQUIRE( conv.ReadString(&mol, smiles) ); // store the stereo data for the smiles string using unique symmetry ids std::vector<OBTetrahedralStereo::Config> tetrahedral1; std::vector<OBCisTransStereo::Config> cistrans1; std::vector<OBSquarePlanarStereo::Config> squareplanar1; // get the stereo data OB_ASSERT( mol.HasData(OBGenericDataType::StereoData) ); std::vector<OBGenericData *> stereoData = mol.GetAllData(OBGenericDataType::StereoData); std::vector<unsigned int> canlbls; std::vector<unsigned int> symclasses; OBGraphSym gs1(&mol); gs1.GetSymmetry(symclasses); CanonicalLabels(&mol, symclasses, canlbls); cout << "mol.NumAtoms = " << mol.NumAtoms() << endl; for (std::vector<OBGenericData*>::iterator data = stereoData.begin(); data != stereoData.end(); ++data) { if (((OBStereoBase*)*data)->GetType() == OBStereo::Tetrahedral) { // convert to tetrahedral data OBTetrahedralStereo *ts = dynamic_cast<OBTetrahedralStereo*>(*data); OB_REQUIRE( ts ); OB_ASSERT( ts->IsValid() ); if (!ts->IsValid()) continue; OBTetrahedralStereo::Config config = ts->GetConfig(); // convert atom ids to symmetry ids if (mol.GetAtomById(config.center)) config.center = canlbls.at( mol.GetAtomById(config.center)->GetIdx() - 1 ); if (mol.GetAtomById(config.from)) config.from = canlbls.at( mol.GetAtomById(config.from)->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[0])) config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[1])) config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[2])) config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 ); cout << "Config with symmetry ids: " << config << endl; tetrahedral1.push_back(config); } else if (((OBStereoBase*)*data)->GetType() == OBStereo::CisTrans) { // convert to tetrahedral data OBCisTransStereo *ct = dynamic_cast<OBCisTransStereo*>(*data); OB_REQUIRE( ct ); OB_ASSERT( ct->IsValid() ); OBCisTransStereo::Config config = ct->GetConfig(); // convert atom ids to symmetry ids config.begin = canlbls.at( mol.GetAtomById(config.begin)->GetIdx() - 1 ); config.end = canlbls.at( mol.GetAtomById(config.end)->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[0])) config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[1])) config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[2])) config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[3])) config.refs[3] = canlbls.at( mol.GetAtomById(config.refs[3])->GetIdx() - 1 ); cout << "Config with symmetry ids: " << config << endl; cistrans1.push_back(config); } else if (((OBStereoBase*)*data)->GetType() == OBStereo::SquarePlanar) { // convert to tetrahedral data OBSquarePlanarStereo *sp = dynamic_cast<OBSquarePlanarStereo*>(*data); OB_REQUIRE( sp ); OB_ASSERT( sp->IsValid() ); if (!sp->IsValid()) continue; OBSquarePlanarStereo::Config config = sp->GetConfig(); // convert atom ids to symmetry ids if (mol.GetAtomById(config.center)) config.center = canlbls.at( mol.GetAtomById(config.center)->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[0])) config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[1])) config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[2])) config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[3])) config.refs[3] = canlbls.at( mol.GetAtomById(config.refs[3])->GetIdx() - 1 ); cout << "Config with symmetry ids: " << config << endl; squareplanar1.push_back(config); } } // write to can smiles std::string canSmiles = conv.WriteString(&mol); cout << "canSmiles: " << canSmiles; // read can smiles in again OB_REQUIRE( conv.ReadString(&mol, canSmiles) ); // store the stereo data for the smiles string using unique symmetry ids std::vector<OBTetrahedralStereo::Config> tetrahedral2; std::vector<OBCisTransStereo::Config> cistrans2; std::vector<OBSquarePlanarStereo::Config> squareplanar2; // get the stereo data OB_ASSERT( mol.HasData(OBGenericDataType::StereoData) ); stereoData = mol.GetAllData(OBGenericDataType::StereoData); OBGraphSym gs2(&mol); gs2.GetSymmetry(symclasses); CanonicalLabels(&mol, symclasses, canlbls); cout << "mol.NumAtoms = " << mol.NumAtoms() << endl; for (std::vector<OBGenericData*>::iterator data = stereoData.begin(); data != stereoData.end(); ++data) { if (((OBStereoBase*)*data)->GetType() == OBStereo::Tetrahedral) { // convert to tetrahedral data OBTetrahedralStereo *ts = dynamic_cast<OBTetrahedralStereo*>(*data); OB_REQUIRE( ts ); OB_ASSERT( ts->IsValid() ); OBTetrahedralStereo::Config config = ts->GetConfig(); // convert atom ids to symmetry ids if (mol.GetAtomById(config.center)) config.center = canlbls.at( mol.GetAtomById(config.center)->GetIdx() - 1 ); if (mol.GetAtomById(config.from)) config.from = canlbls.at( mol.GetAtomById(config.from)->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[0])) config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[1])) config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[2])) config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 ); cout << "Config with symmetry ids: " << config << endl; tetrahedral2.push_back(config); } if (((OBStereoBase*)*data)->GetType() == OBStereo::CisTrans) { // convert to tetrahedral data OBCisTransStereo *ct = dynamic_cast<OBCisTransStereo*>(*data); OB_REQUIRE( ct ); OB_ASSERT( ct->IsValid() ); OBCisTransStereo::Config config = ct->GetConfig(); // convert atom ids to symmetry ids config.begin = canlbls.at( mol.GetAtomById(config.begin)->GetIdx() - 1 ); config.end = canlbls.at( mol.GetAtomById(config.end)->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[0])) config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[1])) config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[2])) config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[3])) config.refs[3] = canlbls.at( mol.GetAtomById(config.refs[3])->GetIdx() - 1 ); cout << "Config with symmetry ids: " << config << endl; cistrans2.push_back(config); } else if (((OBStereoBase*)*data)->GetType() == OBStereo::SquarePlanar) { // convert to tetrahedral data OBSquarePlanarStereo *sp = dynamic_cast<OBSquarePlanarStereo*>(*data); OB_REQUIRE( sp ); OB_ASSERT( sp->IsValid() ); OBSquarePlanarStereo::Config config = sp->GetConfig(); // convert atom ids to symmetry ids if (mol.GetAtomById(config.center)) config.center = canlbls.at( mol.GetAtomById(config.center)->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[0])) config.refs[0] = canlbls.at( mol.GetAtomById(config.refs[0])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[1])) config.refs[1] = canlbls.at( mol.GetAtomById(config.refs[1])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[2])) config.refs[2] = canlbls.at( mol.GetAtomById(config.refs[2])->GetIdx() - 1 ); if (mol.GetAtomById(config.refs[3])) config.refs[3] = canlbls.at( mol.GetAtomById(config.refs[3])->GetIdx() - 1 ); cout << "Config with symmetry ids: " << config << endl; squareplanar2.push_back(config); } } // compare the tetrahedral structs OB_ASSERT( tetrahedral1.size() == tetrahedral2.size() ); for (unsigned int i = 0; i < tetrahedral1.size(); ++i) { for (unsigned int j = 0; j < tetrahedral2.size(); ++j) { if (tetrahedral1[i].center == tetrahedral2[j].center) OB_ASSERT( tetrahedral1[i] == tetrahedral2[j] ); if ( tetrahedral1[i] != tetrahedral2[j] ) { cout << "1 = " << tetrahedral1[i] << endl; cout << "2 = " << tetrahedral2[j] << endl; } } } // compare the cistrans structs OB_ASSERT( cistrans1.size() == cistrans2.size() ); for (unsigned int i = 0; i < cistrans1.size(); ++i) { for (unsigned int j = 0; j < cistrans2.size(); ++j) { if ((cistrans1[i].begin == cistrans2[j].begin) && (cistrans1[i].end == cistrans2[j].end)) OB_ASSERT( cistrans1[i] == cistrans2[j] ); if ((cistrans1[i].begin == cistrans2[j].end) && (cistrans1[i].end == cistrans2[j].begin)) OB_ASSERT( cistrans1[i] == cistrans2[j] ); } } // compare the square-planar structs OB_ASSERT( squareplanar1.size() == squareplanar2.size() ); for (unsigned int i = 0; i < squareplanar1.size(); ++i) { for (unsigned int j = 0; j < squareplanar2.size(); ++j) { if (squareplanar1[i].center == squareplanar2[j].center) OB_ASSERT( squareplanar1[i] == squareplanar2[j] ); if ( squareplanar1[i] != squareplanar2[j] ) { cout << "1 = " << squareplanar1[i] << endl; cout << "2 = " << squareplanar2[j] << endl; } } } cout << "." << endl << endl; }
int main(int argc,char **argv) { char *program_name= argv[0]; int c; char *FileIn = NULL; if (argc != 2) { string err = "Usage: "; err += program_name; err += " <filename>\n" "Output format:\n" "name NAME\n" "formula FORMULA\n" "mol_weight MOLECULAR_WEIGHT\n" "exact_mass ISOTOPIC MASS\n" "canonical_SMILES STRING\n" "InChI STRING\n" "num_atoms NUM\n" "num_bonds NUM\n" "num_residues NUM\n" "num_rotors NUM\n" "sequence RESIDUE_SEQUENCE\n" "num_rings NUMBER_OF_RING_(SSSR)\n" "logP NUM\n" "PSA POLAR_SURFACE_AREA\n" "MR MOLAR REFRACTIVITY"; err += "$$$$"; // ThrowError(err); wasn't being output because error level too low cerr << err; //Why not do directly exit(-1); } else { FileIn = argv[1]; } // Find Input filetype OBConversion conv; OBFormat *format = conv.FormatFromExt(FileIn); if (!format || !conv.SetInFormat(format)) { cerr << program_name << ": cannot read input format!" << endl; exit (-1); } ifstream ifs; // Read the file ifs.open(FileIn); if (!ifs) { cerr << program_name << ": cannot read input file!" << endl; exit (-1); } OBMol mol; OBFormat *canSMIFormat = conv.FindFormat("can"); OBFormat *inchiFormat = conv.FindFormat("inchi"); //////////////////////////////////////////////////////////////////////////// // List of properties // Name // Molecular weight (Standard molar mass given by IUPAC atomic masses) // Number of rings : the size of the smallest set of smallest rings (SSSR) //.....ADD YOURS HERE..... for (c = 1;; ++c) { mol.Clear(); conv.Read(&mol, &ifs); if (mol.Empty()) break; if (!mol.HasHydrogensAdded()) mol.AddHydrogens(); // Print the properties if (strlen(mol.GetTitle()) != 0) cout << "name " << mol.GetTitle() << endl; else cout << "name " << FileIn << " " << c << endl; cout << "formula " << mol.GetFormula() << endl; cout << "mol_weight " << mol.GetMolWt() << endl; cout << "exact_mass " << mol.GetExactMass() << endl; string smilesString = "-"; if (canSMIFormat) { conv.SetOutFormat(canSMIFormat); smilesString = conv.WriteString(&mol); if ( smilesString.length() == 0 ) { smilesString = "-"; } } cout << "canonical_SMILES " << smilesString << endl; string inchiString = "-"; if (inchiFormat) { conv.SetOutFormat(inchiFormat); inchiString = conv.WriteString(&mol); if ( inchiString.length() == 0 ) { inchiString = "-"; } } cout << "InChI " << inchiString << endl; cout << "num_atoms " << mol.NumAtoms() << endl; cout << "num_bonds " << mol.NumBonds() << endl; cout << "num_residues " << mol.NumResidues() << endl; cout << "num_rotors " << mol.NumRotors() << endl; if (mol.NumResidues() > 0) cout << "sequence " << sequence(mol) << endl; else cout << "sequence " << "-" << endl; cout << "num_rings " << nrings(mol) << endl; OBDescriptor* pDesc; pDesc= OBDescriptor::FindType("logP"); if(pDesc) cout << "logP " << pDesc->Predict(&mol) << endl; pDesc = OBDescriptor::FindType("TPSA"); if(pDesc) cout << "PSA " << pDesc->Predict(&mol) << endl; pDesc = OBDescriptor::FindType("MR"); if(pDesc) cout << "MR " << pDesc->Predict(&mol) << endl; cout << "$$$$" << endl; // SDF like end of compound descriptor list //Other OBDescriptors could be output here, even ones that were rarely // used. Since these are plugin classes, they may not be loaded, but // then with code like the above they are just ignored. } // end for loop return(0); }