bool FastSearchFormat::ObtainTarget(OBConversion* pConv, OBMol& patternMol, const string& indexname) { //Obtains an OBMol // either from the SMARTS string in the -s option // or by converting the file in the -S option //or, if neither option is provided, displays information on the index file. stringstream smiles(stringstream::out); ifstream patternstream; OBConversion PatternConv(&patternstream,&smiles); const char* p = pConv->IsOption("s",OBConversion::GENOPTIONS); string txt; if(p) { // Use the -s option txt=p; stringstream smarts(txt, stringstream::in); OBConversion Convsm(&smarts); if(!Convsm.SetInFormat("smi")) return false; Convsm.Read(&patternMol); //erase -s option in GeneralOptions since it will be rewritten pConv->RemoveOption("s",OBConversion::GENOPTIONS); if(patternMol.Empty()) { obErrorLog.ThrowError(__FUNCTION__, "Could not make a molecule from " + smarts.str() + "\nThis needs to be valid SMILES when using fastsearch." "You can use the more versatile SMARTS in a normal substructure search." , obError); return false; } } else { // or Make OBMol from file in -S option or -aS option p = pConv->IsOption("S",OBConversion::GENOPTIONS); if(!p) p = pConv->IsOption("S",OBConversion::INOPTIONS);//for GUI mainly } if(!p) { //neither -s or -S options provided. Output info rather than doing search const FptIndexHeader& header = fs.GetIndexHeader(); string id(header.fpid); if(id.empty()) id = "default"; clog << indexname << " is an index of\n " << header.datafilename << ".\n It contains " << header.nEntries << " molecules. The fingerprint type is " << id << " with " << OBFingerprint::Getbitsperint() * header.words << " bits.\n" << "Typical usage for a substructure search:\n" << "babel indexfile.fs -osmi -sSMILES" << endl; return false; } if(p && patternMol.Empty()) { txt=p; string::size_type pos = txt.find_last_of('.'); if(pos==string::npos) { obErrorLog.ThrowError(__FUNCTION__, "Filename of pattern molecule in -S option must have an extension", obError); return false; } patternstream.open(txt.c_str()); if(!patternstream) { stringstream errorMsg; errorMsg << "Cannot open " << txt << endl; obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError); return false; } PatternConv.SetOneObjectOnly(); if(PatternConv.SetInFormat(txt.substr(pos+1).c_str())) PatternConv.Read(&patternMol); } if(patternMol.Empty()) { obErrorLog.ThrowError(__FUNCTION__, "Cannot derive a molecule from the -s or -S options", obWarning); return false; } patternMol.ConvertDativeBonds();//use standard form for dative bonds //Convert to SMILES and generate a -s option for use in the final filtering if(!PatternConv.SetOutFormat("smi")) return false; PatternConv.Write(&patternMol); //remove name to leave smiles string string smilesstr(smiles.str()); string::size_type pos = smilesstr.find_first_of(" \t\r\n"); if(pos!=string::npos) smilesstr = smilesstr.substr(0,pos); pConv->AddOption("s", OBConversion::GENOPTIONS, smilesstr.c_str()); return true; }
bool fragmentMol(const ROMol& mol, std::vector<std::pair<ROMOL_SPTR, ROMOL_SPTR> >& res, unsigned int minCuts, unsigned int maxCuts, unsigned int maxCutBonds, const std::string& pattern) { #ifdef _DEBUG for (size_t i = 0; i < mol.getNumAtoms(); i++) { std::string symbol = mol.getAtomWithIdx(i)->getSymbol(); int label = 0; mol.getAtomWithIdx(i)->getPropIfPresent(common_properties::molAtomMapNumber, label); char a1[32]; if (0 == label) sprintf(a1, "\'%s\'", symbol.c_str(), label); else sprintf(a1, "\'%s:%u\'", symbol.c_str(), label); std::cout << "Atom " << i << ": " << a1; //<<" Bonds:"; std::cout << "\n"; } #endif res.clear(); std::auto_ptr<const ROMol> smarts((const ROMol*)SmartsToMol(pattern)); std::vector<MatchVectType> matching_atoms; // one bond per match ! with default pattern unsigned int total = SubstructMatch(mol, *smarts, matching_atoms); #ifdef _DEBUG std::cout << "total substructs =" << total << "\nmatching bonds (atom1, atom2):\n"; #endif if (0 == total) // Not found. Return empty set of molecules return false; #ifdef _DEBUG for (size_t i = 0; i < matching_atoms.size(); i++) { std::string symbol = mol.getAtomWithIdx(matching_atoms[i][0].second)->getSymbol(); int label = 0; mol.getAtomWithIdx(matching_atoms[i][0].second) ->getPropIfPresent(common_properties::molAtomMapNumber, label); char a1[32]; if (0 == label) sprintf(a1, "\'%s\'", symbol.c_str(), label); else sprintf(a1, "\'%s:%u\'", symbol.c_str(), label); symbol = mol.getAtomWithIdx(matching_atoms[i][1].second)->getSymbol(); label = 0; mol.getAtomWithIdx(matching_atoms[i][1].second) ->getPropIfPresent(common_properties::molAtomMapNumber, label); char a2[32]; if (0 == label) sprintf(a2, "\'%s\'", symbol.c_str(), label); else sprintf(a2, "\'%s:%u\'", symbol.c_str(), label); std::cout << i << ": (" << matching_atoms[i][0].second << a1 << "," << matching_atoms[i][1].second << a2 << ") \n"; } #endif std::vector<BondVector_t> matching_bonds; // List of matched query's bonds convertMatchingToBondVect(matching_bonds, matching_atoms, mol); if (matching_bonds.size() > maxCutBonds) return false; #ifdef _DEBUG std::cout << "total matching_bonds = " << matching_bonds.size() << "\n"; #endif // loop to generate every cut in the molecule BondVector_t bonds_selected; processCuts(0, minCuts, maxCuts, bonds_selected, matching_bonds, mol, res); return true; }