// Helper function -- handle SMARTS selections // Called by performAction() void SelectExtension::selectSMARTS(GLWidget *widget) { bool ok; QString pattern = QInputDialog::getText(qobject_cast<QWidget*>(parent()), tr("SMARTS Selection"), tr("SMARTS pattern to select"), QLineEdit::Normal, "", &ok); if (ok && !pattern.isEmpty()) { OBSmartsPattern smarts; smarts.Init(pattern.toStdString()); OpenBabel::OBMol obmol = m_molecule->OBMol(); smarts.Match(obmol); // if we have matches, select them if(smarts.NumMatches() != 0) { QList<Primitive *> matchedAtoms; vector< vector <int> > mapList = smarts.GetUMapList(); vector< vector <int> >::iterator i; // a set of matching atoms vector<int>::iterator j; // atom ids in each match for (i = mapList.begin(); i != mapList.end(); ++i) { for (j = i->begin(); j != i->end(); ++j) { matchedAtoms.append(m_molecule->atom(obmol.GetAtom(*j)->GetIdx()-1)); } } widget->clearSelected(); widget->setSelected(matchedAtoms, true); widget->update(); } // end matches } return; }
int main() { OBAtom a, b, c; a.SetAtomicNum(8); b.SetAtomicNum(6); c.SetAtomicNum(8); OBMol mol; mol.AddAtom(a); mol.AddAtom(b); mol.AddAtom(c); mol.AddBond(1,2,2); mol.AddBond(2,3,2); OBConversion conv; conv.SetOutFormat("SMI"); cout << conv.WriteString(&mol,1) << endl; OBSmartsPattern sp; sp.Init ("C~*"); sp.Match (mol,false); cout << sp.NumMatches() << endl; cout << sp.GetUMapList().size() << endl; return EXIT_SUCCESS; }
double Predict(OBBase* pOb, string* param=NULL) { OBMol* pmol = dynamic_cast<OBMol*> (pOb); if(!pmol) return 0; OBSmartsPattern sp; if (sp.Init(_smarts) && sp.Match(*pmol)) return sp.GetUMapList().size(); else return 0.0; }
/** The descriptor name can be s or smarts and is case independent The operator to return true for a match can be: one or more spaces, =, ==, or nothing if the SMARTS string starts with a letter. To return true for a mismatch the operator is != A space or tab should follow the SMARTS string. **/ bool SmartsFilter::Compare(OBBase* pOb, istream& optionText, bool noEval) { OBMol* pmol = dynamic_cast<OBMol*> (pOb); if(!pmol) return false; string smarts; bool matchornegate = ReadStringFromFilter(optionText, smarts); if(noEval) return false; OBSmartsPattern sp; sp.Init(smarts); bool ret = sp.Match(*pmol,true);//single match if(!matchornegate) ret = !ret; return ret; }
bool GetFingerprint(OBBase* pOb, vector<unsigned int>&fp, int nbits) { OBMol* pmol = dynamic_cast<OBMol*>(pOb); unsigned int o=0; unsigned int m=0; unsigned int i=0; unsigned int n=0; if(!pmol) return false; //Read patterns file if it has not been done already if(smartsStrings.empty()) ReadPatternFile(_patternsfile, smartsStrings); //Make fp size the smallest power of two to contain the patterns //unsigned int n=Getbitsperint(); //while(n<smartsStrings.size())n*=2; //fp.resize(n/Getbitsperint()); fp.resize(16); for(n=0;n<smartsStrings.size();++n) { OBSmartsPattern sp; sp.Init(smartsStrings[n]); if(sp.Match(*pmol)) { m=sp.GetUMapList().size(); //m=sp.NumMatches(); o=n*8; for(i=0;i<8;++i) { if(i<m) {SetBit(fp, o+i); //cout << "1"; } //cout << endl; } } } if(nbits) Fold(fp, nbits); return true; };
void CheckSmarts::checkSmarts() { OBSmartsPattern smartsPattern; for (unsigned int i = 0; i < data->num_smarts(); i++) { cout << *data->get_smarts(i) << "\t[ "; for (unsigned int j = 0; j < data->num_smiles(); j++) { smartsPattern.Init(*data->get_smarts(i)); if (smartsPattern.Match(*data->get_mol(j), true)) cout << data->get_id(j) << " "; } cout << "]\n"; } }
/** The descriptor name can be s or smarts and is case independent The operator to return true for a match can be: one or more spaces, =, ==, or nothing if the SMARTS string starts with a letter. To return true for a mismatch the operator is != A space or tab should follow the SMARTS string. **/ bool SmartsFilter::Compare(OBBase *pOb, istream &optionText, bool noEval, std::string *) { OBMol *pmol = dynamic_cast<OBMol *>(pOb); if (!pmol) return false; string smarts; bool matchornegate = ReadStringFromFilter(optionText, smarts); if (noEval) return false; OBSmartsPattern sp; if (!sp.Init(smarts)) return false; // can't initialize the SMARTS, so fail gracefully bool ret = sp.Match(*pmol, true); // single match if (!matchornegate) ret = !ret; return ret; }
/////////////////////////////////////////////////////////////////////////////// //! \brief Find the molecule(s) with or without a given SMART pattern int main(int argc,char **argv) { char c; unsigned int ntimes=0; // number of times SMARTS matches in a molecule unsigned int numMatching = 0; // number of matching molecules (for -c flag) bool pattern_matched=false, ntimes_matched=true; bool count=false, invert=false, full=false, name_only=false; char *FileIn = NULL, *Pattern = NULL; char *program_name = argv[0]; char *iext; bool useInFile = true; OBConversion conv(&cin,&cout); OBFormat *pFormat = conv.FindFormat("smi"); // default format is SMILES // Parse options while ((c = getopt(argc, argv, "t:nvcfi:-")) != -1) { #ifdef _WIN32 char optopt = c; #endif switch (c) { case 't': // request ntimes unique matches c = sscanf(optarg, "%d", &ntimes); if (c != 1 ) { cerr << program_name << ": unable to parse -t option" << endl; exit (-1); } break; case 'i': iext = optarg; //The ID provided by the OBFormat class is used as // the identifying file extension. This is a slight // reduction in flexibility (which is not currently used) pFormat = conv.FindFormat(iext); if(pFormat==NULL) { cerr << program_name << ": cannot read input format!" << endl; exit(-1); } break; case 'n': // print the molecule name only name_only = true; break; case 'c': // count the number of match count = true; break; case 'v': // match only the molecules without the pattern invert = true; break; case 'f': full = true; break; case '-': useInFile = false; break; case '?': if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); return 1; } } int index = optind; if (argc-index != 2 && argc-index != 1) { string err = "Usage: "; err += program_name; err += " [options] \"PATTERN\" <filename>\n"; err += "If no filename is supplied, then obgrep will use stdin instead.\n"; err += "Options:\n"; err += " -v Invert the matching, print non-matching molecules\n"; err += " -c Print the number of matched molecules\n"; err += " -i <format> Specify the input and output format\n"; err += " -f Full match, print matching-molecules when the number\n"; err += " of heavy atoms is equal to the number of PATTERN atoms\n"; err += " -n Only print the name of the molecules\n"; err += " -t NUM Print a molecule only if the PATTERN occurs NUM times inside the molecule\n"; cerr << err << ends; exit(-1); } else { Pattern = argv[index++]; if (argc - index == 1) FileIn = argv[index]; } ifstream ifs; if (useInFile && FileIn != NULL) { // Read the file ifs.open(FileIn); if (!ifs) { cerr << program_name << ": cannot read input file!" << endl; exit (-1); } conv.SetInStream(&ifs); // Find Input filetype if (pFormat == NULL) { pFormat = conv.FormatFromExt(FileIn); if (pFormat == NULL) { cerr << program_name << ": cannot read input format!" << endl; return (-1); } } } if (! conv.SetInAndOutFormats(pFormat, pFormat)) { cerr << program_name << ": cannot read or write to this file format" << endl; return (-1); } // Match the SMART OBSmartsPattern sp; vector< vector <int> > maplist; // list of matched atoms sp.Init(Pattern); OBMol mol; bool impossible_match; // Search for pattern for (c=0;;) { mol.Clear(); conv.Read(&mol); if (mol.Empty()) break; //////////////////////////////////////////////////////////////// // Do not loose time trying to match the pattern if the matching // is impossible. // It is impossible to make a full match if the number of atoms is // different if (full ) impossible_match = (sp.NumAtoms() == mol.NumHvyAtoms()) ? false : true; else impossible_match = false; if (impossible_match) { // -> avoid useless SMART matching attempt if (invert) { if (!count) { if ( name_only ) cout << mol.GetTitle() << endl; else conv.Write(&mol, &cout); } numMatching++; } continue; } //////////////////////////////////////////////////////////////// // perform SMART matching pattern_matched = sp.Match(mol); // the number of times the match occured may matter if ( ntimes ) { // ntimes is a positive integer of requested matches // Here, a match mean a unique match (same set of atoms) // so we need to get the unique match list size maplist = sp.GetUMapList(); if( maplist.size() == ntimes ) ntimes_matched = true; else ntimes_matched = false; } else { // ntimes == 0, we don't care about the number of matches ntimes_matched = true; } //////////////////////////////////////////////////////////////// // perform a set of tests to guess what to print out if ( pattern_matched == true && ntimes_matched == true) { if (!invert) { // do something only when invert flag is off if (!count) { if ( name_only ) cout << mol.GetTitle() << endl; else conv.Write(&mol, &cout); } numMatching++; } } else { // The SMART pattern do not occur as many times as requested if (invert) { // do something only if invert flag is on if (!count) { if ( name_only ) cout << mol.GetTitle() << endl; else conv.Write(&mol, &cout); } numMatching++; } } } // end for loop //////////////////////////////////////////////////////////////// // Only print the number of matched molecules as requested if (count) { cout << numMatching << endl; } return(1); }
int main(int argc, char* argv[]) { // Check the required number of command line arguments. if (argc != 5) { cout << "usr host user pwd jobs_path" << endl; return 0; } // Fetch command line arguments. const auto host = argv[1]; const auto user = argv[2]; const auto pwd = argv[3]; const path jobs_path = argv[4]; // Connect to host and authenticate user. DBClientConnection conn; { cout << local_time() << "Connecting to " << host << " and authenticating " << user << endl; string errmsg; if ((!conn.connect(host, errmsg)) || (!conn.auth("istar", user, pwd, errmsg))) { cerr << local_time() << errmsg << endl; return 1; } } // Initialize constants. cout << local_time() << "Initializing" << endl; const auto collection = "istar.usr"; const auto epoch = date(1970, 1, 1); const size_t num_usrs = 2; constexpr array<size_t, num_usrs> qn{{ 12, 60 }}; constexpr array<double, num_usrs> qv{{ 1.0 / qn[0], 1.0 / qn[1] }}; const size_t num_references = 4; const size_t num_subsets = 5; const array<string, num_subsets> SubsetSMARTS {{ "[!#1]", // heavy "[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]", // hydrophobic "[a]", // aromatic "[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N&v3;H1,H2]-[!$(*=[O,N,P,S])]),$([N;v3;H0]),$([n,o,s;+0]),F]", // acceptor "[N!H0v3,N!H0+v4,OH+0,SH+0,nH+0]", // donor }}; // Initialize variables. array<array<double, qn.back()>, 1> qw; array<array<double, qn.back()>, 1> lw; auto q = qw[0]; auto l = lw[0]; // Read ZINC ID file. const string_array<size_t> zincids("16_zincid.txt"); const auto num_ligands = zincids.size(); // Read SMILES file. const string_array<size_t> smileses("16_smiles.txt"); assert(smileses.size() == num_ligands); // Read supplier file. const string_array<size_t> suppliers("16_supplier.txt"); assert(suppliers.size() == num_ligands); // Read property files of floating point types and integer types. const auto zfproperties = read<array<float, 4>>("16_zfprop.f32"); assert(zfproperties.size() == num_ligands); const auto ziproperties = read<array<int16_t, 5>>("16_ziprop.i16"); assert(ziproperties.size() == num_ligands); // Open files for subsequent reading. std::ifstream usrcat_bin("16_usrcat.f64"); stream_array<size_t> ligands("16_ligand.pdbqt"); assert(ligands.size() == num_ligands); array<vector<double>, 2> scores {{ vector<double>(num_ligands, 0), vector<double>(num_ligands, 0) }}; const auto& u0scores = scores[0]; const auto& u1scores = scores[1]; vector<size_t> scase(num_ligands); // Enter event loop. cout << local_time() << "Entering event loop" << endl; bool sleeping = false; while (true) { // Fetch an incompleted job in a first-come-first-served manner. if (!sleeping) cout << local_time() << "Fetching an incompleted job" << endl; BSONObj info; conn.runCommand("istar", BSON("findandmodify" << "usr" << "query" << BSON("done" << BSON("$exists" << false) << "started" << BSON("$exists" << false)) << "sort" << BSON("submitted" << 1) << "update" << BSON("$set" << BSON("started" << Date_t(duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count())))), info); // conn.findAndModify() is available since MongoDB C++ Driver legacy-1.0.0 const auto value = info["value"]; if (value.isNull()) { // No incompleted jobs. Sleep for a while. if (!sleeping) cout << local_time() << "Sleeping" << endl; sleeping = true; this_thread::sleep_for(chrono::seconds(10)); continue; } sleeping = false; const auto job = value.Obj(); // Obtain job properties. const auto _id = job["_id"].OID(); cout << local_time() << "Executing job " << _id.str() << endl; const auto job_path = jobs_path / _id.str(); const auto format = job["format"].String(); const auto email = job["email"].String(); // Parse the user-supplied ligand. OBMol obMol; OBConversion obConversion; obConversion.SetInFormat(format.c_str()); obConversion.ReadFile(&obMol, (job_path / ("ligand." + format)).string()); const auto num_atoms = obMol.NumAtoms(); // obMol.AddHydrogens(); // Adding hydrogens does not seem to affect SMARTS matching. // Classify subset atoms. array<vector<int>, num_subsets> subsets; for (size_t k = 0; k < num_subsets; ++k) { auto& subset = subsets[k]; subset.reserve(num_atoms); OBSmartsPattern smarts; smarts.Init(SubsetSMARTS[k]); smarts.Match(obMol); for (const auto& map : smarts.GetMapList()) { subset.push_back(map.front()); } } const auto& subset0 = subsets.front(); // Check user-provided ligand validity. if (subset0.empty()) { // Record job completion time stamp. const auto millis_since_epoch = duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count(); conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("done" << Date_t(millis_since_epoch)))); // Send error notification email. cout << local_time() << "Sending an error notification email to " << email << endl; MailMessage message; message.setSender("usr <*****@*****.**>"); message.setSubject("Your usr job has failed"); message.setContent("Description: " + job["description"].String() + "\nSubmitted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(job["submitted"].Date().millis))) + " UTC\nFailed: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(millis_since_epoch))) + " UTC\nReason: failed to parse the provided ligand."); message.addRecipient(MailRecipient(MailRecipient::PRIMARY_RECIPIENT, email)); SMTPClientSession session("137.189.91.190"); session.login(); session.sendMessage(message); session.close(); continue; } // Calculate the four reference points. const auto n = subset0.size(); const auto v = 1.0 / n; array<vector3, num_references> references{}; auto& ctd = references[0]; auto& cst = references[1]; auto& fct = references[2]; auto& ftf = references[3]; for (const auto i : subset0) { ctd += obMol.GetAtom(i)->GetVector(); } ctd *= v; double cst_dist = numeric_limits<double>::max(); double fct_dist = numeric_limits<double>::lowest(); double ftf_dist = numeric_limits<double>::lowest(); for (const auto i : subset0) { const auto& a = obMol.GetAtom(i)->GetVector(); const auto this_dist = a.distSq(ctd); if (this_dist < cst_dist) { cst = a; cst_dist = this_dist; } if (this_dist > fct_dist) { fct = a; fct_dist = this_dist; } } for (const auto i : subset0) { const auto& a = obMol.GetAtom(i)->GetVector(); const auto this_dist = a.distSq(fct); if (this_dist > ftf_dist) { ftf = a; ftf_dist = this_dist; } } // Precalculate the distances between each atom and each reference point. array<vector<double>, num_references> dista; for (size_t k = 0; k < num_references; ++k) { const auto& reference = references[k]; auto& dists = dista[k]; dists.resize(1 + num_atoms); // OpenBabel atom index starts from 1. dists[0] is dummy. for (size_t i = 0; i < n; ++i) { dists[subset0[i]] = sqrt(obMol.GetAtom(subset0[i])->GetVector().distSq(reference)); } } // Calculate USR and USRCAT features of the input ligand. size_t qo = 0; for (const auto& subset : subsets) { const auto n = subset.size(); for (size_t k = 0; k < num_references; ++k) { const auto& distp = dista[k]; vector<double> dists(n); for (size_t i = 0; i < n; ++i) { dists[i] = distp[subset[i]]; } array<double, 3> m{}; if (n > 2) { const auto v = 1.0 / n; for (size_t i = 0; i < n; ++i) { const auto d = dists[i]; m[0] += d; } m[0] *= v; for (size_t i = 0; i < n; ++i) { const auto d = dists[i] - m[0]; m[1] += d * d; } m[1] = sqrt(m[1] * v); for (size_t i = 0; i < n; ++i) { const auto d = dists[i] - m[0]; m[2] += d * d * d; } m[2] = cbrt(m[2] * v); } else if (n == 2) { m[0] = 0.5 * (dists[0] + dists[1]); m[1] = 0.5 * fabs(dists[0] - dists[1]); } else if (n == 1) { m[0] = dists[0]; } #pragma unroll for (const auto e : m) { q[qo++] = e; } } } assert(qo == qn.back()); // Compute USR and USRCAT scores. usrcat_bin.seekg(0); for (size_t k = 0; k < num_ligands; ++k) { usrcat_bin.read(reinterpret_cast<char*>(l.data()), sizeof(l)); double s = 0; #pragma unroll for (size_t i = 0, u = 0; u < num_usrs; ++u) { #pragma unroll for (const auto qnu = qn[u]; i < qnu; ++i) { s += fabs(q[i] - l[i]); } scores[u][k] = s; } } assert(usrcat_bin.tellg() == sizeof(l) * num_ligands); // Sort ligands by USRCAT score and then by USR score and then by ZINC ID. iota(scase.begin(), scase.end(), 0); sort(scase.begin(), scase.end(), [&](const size_t val0, const size_t val1) { const auto u1score0 = u1scores[val0]; const auto u1score1 = u1scores[val1]; if (u1score0 == u1score1) { const auto u0score0 = u0scores[val0]; const auto u0score1 = u0scores[val1]; if (u0score0 == u0score1) { return zincids[val0] < zincids[val1]; } return u0score0 < u0score1; } return u1score0 < u1score1; }); // Write results. filtering_ostream log_csv_gz; log_csv_gz.push(gzip_compressor()); log_csv_gz.push(file_sink((job_path / "log.csv.gz").string())); log_csv_gz.setf(ios::fixed, ios::floatfield); log_csv_gz << "ZINC ID,USR score,USRCAT score\n" << setprecision(8); filtering_ostream ligands_pdbqt_gz; ligands_pdbqt_gz.push(gzip_compressor()); ligands_pdbqt_gz.push(file_sink((job_path / "ligands.pdbqt.gz").string())); ligands_pdbqt_gz.setf(ios::fixed, ios::floatfield); for (size_t t = 0; t < 10000; ++t) { const size_t k = scase[t]; const auto zincid = zincids[k].substr(0, 8); // Take another substr() to get rid of the trailing newline. const auto u0score = 1 / (1 + scores[0][k] * qv[0]); const auto u1score = 1 / (1 + scores[1][k] * qv[1]); log_csv_gz << zincid << ',' << u0score << ',' << u1score << '\n'; // Only write conformations of the top ligands to ligands.pdbqt.gz. if (t >= 1000) continue; const auto zfp = zfproperties[k]; const auto zip = ziproperties[k]; ligands_pdbqt_gz << "MODEL " << '\n' << "REMARK 911 " << zincid << setprecision(3) << ' ' << setw(8) << zfp[0] << ' ' << setw(8) << zfp[1] << ' ' << setw(8) << zfp[2] << ' ' << setw(8) << zfp[3] << ' ' << setw(3) << zip[0] << ' ' << setw(3) << zip[1] << ' ' << setw(3) << zip[2] << ' ' << setw(3) << zip[3] << ' ' << setw(3) << zip[4] << '\n' << "REMARK 912 " << smileses[k] // A newline is already included in smileses[k]. << "REMARK 913 " << suppliers[k] // A newline is already included in suppliers[k]. << setprecision(8) << "REMARK 951 USR SCORE: " << setw(10) << u0score << '\n' << "REMARK 952 USRCAT SCORE: " << setw(10) << u1score << '\n' ; const auto lig = ligands[k]; ligands_pdbqt_gz.write(lig.data(), lig.size()); ligands_pdbqt_gz << "ENDMDL\n"; } // Update progress. cout << local_time() << "Setting done time" << endl; const auto millis_since_epoch = duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count(); conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("done" << Date_t(millis_since_epoch)))); // Send completion notification email. cout << local_time() << "Sending a completion notification email to " << email << endl; MailMessage message; message.setSender("istar <*****@*****.**>"); message.setSubject("Your usr job has completed"); message.setContent("Description: " + job["description"].String() + "\nSubmitted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(job["submitted"].Date().millis))) + " UTC\nCompleted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(millis_since_epoch))) + " UTC\nResult: http://istar.cse.cuhk.edu.hk/usr/iview/?" + _id.str()); message.addRecipient(MailRecipient(MailRecipient::PRIMARY_RECIPIENT, email)); SMTPClientSession session("137.189.91.190"); session.login(); session.sendMessage(message); session.close(); } }
/////////////////////////////////////////////////////////////////////////////// //! \brief Set a tortional bond to a given angle int main(int argc,char **argv) { const char *Pattern=NULL; unsigned int i, t, errflg = 0; int c; char flags[255]; string err; bool graphOutput=false; // parse the command line -- optional -a flag to change all matching torsions if (argc < 3 || argc > 4) { errflg++; } else { FileIn = argv[1]; Pattern = "[!$(*#*)&!D1]-!@[!$(*#*)&!D1]"; // Read the atom position c = sscanf(argv[2], "%d", &angleSum); angle = 360./angleSum; if (argc == 4) { c = sscanf(argv[3], "%s", flags); int flagid=1; while (flags[flagid]!=0) switch (flags[flagid++]) { case 'g': graphOutput=true; case 'e': forceField=OBForceField::FindForceField("MMFF94"); isEnergyCalcing=true; break; } } } if (errflg) { cerr << "Usage: rkrotate <filename> <angle> [options]" << endl; exit(-1); } // create pattern OBSmartsPattern sp; sp.Init(Pattern); OBFormat* format = conv.FormatFromExt(FileIn); if(!(format && conv.SetInAndOutFormats(format, format))) { //in and out formats same cerr << "obrotate: cannot read and/or write this file format!" << endl; exit (-1); } //...NF //Open the molecule file ifstream ifs; // Read the file ifs.open(FileIn); if (!ifs) { cerr << "obrotate: cannot read input file!" << endl; exit (-1); } OBMol mol; vector< vector <int> > maplist; // list of matched atoms // vector< vector <int> >::iterator m; // and its iterators // int tindex; // Set the angles for (;;) { mol.Clear(); //NF ifs >> mol; // Read molecule conv.Read(&mol,&ifs); //NF if (mol.Empty()) break; if (sp.Match(mol)) { // if match perform rotation maplist = sp.GetUMapList(); // get unique matches if (maplist.size() > 1) cerr << "obrotate: Found " << maplist.size() << " matches." << endl; energySheet=new MultiVector<double>(degrees=maplist.size(),angleSum); indexSheet=new int[maplist.size()]; for (int EXO=0;EXO<maplist.size();++EXO) totalSum*=angleSum+EXO; // look at all the mapping atom but save only the last one. turnMol(mol,maplist,maplist.size()-1); if (graphOutput) { ofstream ofs("energyGraph.mlog"); int ind[degrees]; for (int i=0;i<degrees;++i) ind[i]=0; do { for (int i=0;i<degrees;++i) ofs<<ind[i]<<'\t'; ofs<<energySheet->getVectorValue(ind)<<endl; } while(energySheet->incressIndex(ind)); } if (isEnergyCalcing) { std::vector<int*> lowEnergySheet; totalSum=energySheet->getMinValues(lowEnergySheet); if (totalSum) outputMol(lowEnergySheet,mol,maplist,maplist.size()-1); else cerr << "rkrotate: No low energy conformation found." << endl; } cout << sum; } else { cerr << "obrotate: Found 0 matches for the SMARTS pattern." << endl; exit(-1); } //NF cout << mol; } return(0); }
bool OpNewS::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv) { OBMol* pmol = dynamic_cast<OBMol*>(pOb); if(!pmol) return false; // The SMARTS and any other parameters are extracted on the first molecule // and stored in the static variables vec, inv. The parameter is cleared so that: // (a) the original -s option in transform.cpp is inactive, and // (b) the parsing does not have to be done again for multi-molecule files string txt(pmap->find(GetID())->second); // ID can be "s" or "v" static vector<string> vec; static bool inv; static int nPatternAtoms; //non-zero for exact matches static OBQuery* query; static vector<OBQuery*> queries; vector<OBQuery*>::iterator qiter; if(!txt.empty()) { //Set up on first call tokenize(vec, txt); inv = GetID()[0]=='v'; if(vec[0][0]=='~') { inv = true; vec[0].erase(0,1); } //Interpret as a filename if possible MakeQueriesFromMolInFile(queries, vec[0], &nPatternAtoms); if(vec.size()>1 && vec[1]=="exact") { if(queries.empty()) { //Convert SMARTS to SMILES to count number of atoms OBConversion conv; OBMol patmol; if(!conv.SetInFormat("smi") || !conv.ReadString(&patmol, vec[0])) { obErrorLog.ThrowError(__FUNCTION__, "Cannot read the parameter of -s option, " "which has to be valid SMILES when the exact option is used.", obError, onceOnly); delete pmol; pConv->SetOneObjectOnly(); //stop conversion return false; } nPatternAtoms = patmol.NumHvyAtoms(); } } else nPatternAtoms = 0; //disable old versions pConv->AddOption(GetID(), OBConversion::GENOPTIONS, ""); } bool match; //These are a vector of each mapping, each containing atom indxs. vector<vector<int> > vecatomvec; vector<vector<int> >* pMappedAtoms = NULL; OBSmartsPattern sp; if(nPatternAtoms) if(pmol->NumHvyAtoms() != nPatternAtoms) return false; int imol=0; //index of mol in pattern file if(!queries.empty()) //filename supplied { //match is set true if any of the structures match - OR behaviour for(qiter=queries.begin();qiter!=queries.end();++qiter, ++imol) { OBIsomorphismMapper* mapper = OBIsomorphismMapper::GetInstance(*qiter); OBIsomorphismMapper::Mappings mappings; mapper->MapUnique(pmol, mappings); if( (match = !mappings.empty()) ) // extra parens to indicate truth value { OBIsomorphismMapper::Mappings::iterator ita; OBIsomorphismMapper::Mapping::iterator itb; for(ita=mappings.begin(); ita!=mappings.end();++ita)//each mapping { vector<int> atomvec; for(itb=ita->begin(); itb!=ita->end();++itb)//each atom index atomvec.push_back(itb->second+1); vecatomvec.push_back(atomvec); atomvec.clear(); } pMappedAtoms = &vecatomvec; break; } } } else //SMARTS supplied { if(!sp.Init(vec[0])) { string msg = vec[0] + " cannot be interpreted as either valid SMARTS " "or the name of a file with an extension known to OpenBabel " "that contains one or more pattern molecules."; obErrorLog.ThrowError(__FUNCTION__, msg, obError, onceOnly); delete pmol; pmol = NULL; pConv->SetOneObjectOnly(); //stop conversion return false; } if( (match = sp.Match(*pmol)) ) // extra parens to indicate truth value pMappedAtoms = &sp.GetMapList(); } if((!match && !inv) || (match && inv)) { //delete a non-matching mol delete pmol; pmol = NULL; return false; } if(!inv && vec.size()>=2 && !vec[1].empty() && !nPatternAtoms) { vector<vector<int> >::iterator iter; if(vec[1]=="extract") { //Delete all unmatched atoms. Use only the first match ExtractSubstruct(pmol, *pMappedAtoms->begin()); return true; } // color the substructure if there is a second parameter which is not "exact" or "extract" // with multiple color parameters use the one corresponding to the query molecule, or the last if(imol>vec.size()-2) imol = vec.size()-2; for(iter=pMappedAtoms->begin();iter!=pMappedAtoms->end();++iter)//each match AddDataToSubstruct(pmol, *iter, "color", vec[imol+1]); return true; } if(pConv && pConv->IsLast()) { for(qiter=queries.begin();qiter!=queries.end();++qiter) delete *qiter; queries.clear(); } return true; }
OBBase* OBMol::DoTransformations(const std::map<std::string, std::string>* pOptions, OBConversion* pConv) { // Perform any requested transformations // on a OBMol //The input map has option letters or name as the key and //any associated text as the value. //For normal(non-filter) transforms: // returns a pointer to the OBMol (this) if ok or NULL if not. //For filters returns a pointer to the OBMol (this) if there is a match, //and NULL when not and in addition the OBMol object is deleted NULL. //This is now a virtual function. The OBBase version just returns the OBMol pointer. //This is declared in mol.h //The filter options, s and v allow a obgrep facility. //Used together they must both be true to allow a molecule through. //Parse GeneralOptions if(pOptions->empty()) return this; // DoOps calls Do() for each of the plugin options in the map // It normally returns true, even if there are no options but // can return false if one of the options decides that the // molecule should not be output. If it is a filtering op, it // should delete the molecule itself (unlike the -s, --filter options, // which delete it in this function). if(!OBOp::DoOps(this, pOptions, pConv)) return (OBBase *)NULL; bool ret=true; map<string,string>::const_iterator itr, itr2; if(pOptions->find("b")!=pOptions->end()) if(!ConvertDativeBonds()) ret=false; if(pOptions->find("d")!=pOptions->end()) if(!DeleteHydrogens()) ret=false; if(pOptions->find("h")!=pOptions->end()) if(!AddHydrogens(false, false)) ret=false; if(pOptions->find("r")!=pOptions->end()) { StripSalts(); ret = true; } itr = pOptions->find("p"); if(itr!=pOptions->end()) { double pH = strtod(itr->second.c_str(), 0); if(!AddHydrogens(false, true, pH)) ret=false; } if(pOptions->find("c")!=pOptions->end()) Center(); itr = pOptions->find("title"); //Replaces title if(itr!=pOptions->end()) SetTitle(itr->second.c_str()); itr = pOptions->find("addtotitle"); //Appends text to title if(itr!=pOptions->end()) { string title(GetTitle()); title += itr->second; SetTitle(title.c_str()); } /* itr = pOptions->find("addformula"); //Appends tab + formula to title if(itr!=pOptions->end()) { string title(GetTitle()); title += '\t' + GetSpacedFormula(1,"");//actually unspaced SetTitle(title.c_str()); } */ //Add an extra property to the molecule. //Parameter has atrribute and value separated by a space itr = pOptions->find("property"); if(itr!=pOptions->end()) { string txt(itr->second); string::size_type pos = txt.find(' '); if(pos==string::npos) { obErrorLog.ThrowError(__FUNCTION__, "Missing property value", obError); ret=false; } else { string attr(txt.substr(0,pos)), val(txt.substr(pos+1)); //Update value if it already exists OBPairData* dp = dynamic_cast<OBPairData*>(GetData(attr)); if(dp) { dp->SetValue(val); dp->SetOrigin(userInput); } else { // Pair did not exist; make new one dp = new OBPairData; dp->SetAttribute(attr); dp->SetValue(val); dp->SetOrigin(userInput); SetData(dp); } } } itr = pOptions->find("add"); //adds new properties from descriptors in list if(itr!=pOptions->end()) OBDescriptor::AddProperties(this, itr->second); itr = pOptions->find("delete"); //deletes the specified properties if(itr!=pOptions->end()) OBDescriptor::DeleteProperties(this, itr->second); itr = pOptions->find("append"); //Appends values of descriptors or properties to title if(itr!=pOptions->end()) { string title(GetTitle()); title += OBDescriptor::GetValues(this, itr->second); if(ispunct(title[0])) title[0]=' ';//a leading punct char is used only as a separator, not at start SetTitle(Trim(title).c_str()); } //Filter using OBDescriptor comparison and (older) SMARTS tests //Continue only if previous test was true. bool fmatch = true; itr = pOptions->find("filter"); if(itr!=pOptions->end()) { std::istringstream optionText(itr->second); fmatch = OBDescriptor::FilterCompare(this, optionText, false); } if(fmatch) { itr = pOptions->find("v"); if(itr!=pOptions->end() && !itr->second.empty()) { //inverse match quoted SMARTS string which follows OBSmartsPattern sp; sp.Init(itr->second); fmatch = !sp.Match(*this); //(*pmol) ; } } if(fmatch) { itr = pOptions->find("s"); if(itr!=pOptions->end() && !itr->second.empty()) { //SMARTS filter //If exactmatch option set (probably in fastsearchformat) the //number of atoms in the pattern (passed as a string in the option text) //has to be the same as in the molecule. itr2 = pOptions->find("exactmatch"); if(itr2!=pOptions->end() && NumHvyAtoms()!=atoi(itr2->second.c_str())) fmatch=false; else { //match quoted SMARTS string which follows OBSmartsPattern sp; sp.Init(itr->second.c_str()); fmatch = sp.Match(*this); } } } if(!fmatch) { //filter failed: delete OBMol and return NULL delete this; return NULL; } else { if(ret==false) { obErrorLog.ThrowError(__FUNCTION__, "Error executing an option", obError); delete this; //added 9March2006 return NULL; } else return this; } }
void OBBondTyper::AssignFunctionalGroupBonds(OBMol &mol) { if (!_init) Init(); OBSmartsPattern *currentPattern; OBBond *b1, *b2; OBAtom *a1,*a2, *a3; double angle, dist1, dist2; vector<int> assignments; vector<vector<int> > mlist; vector<vector<int> >::iterator matches, l; vector<pair<OBSmartsPattern*, vector<int> > >::iterator i; unsigned int j; // Loop through for all the functional groups and assign bond orders for (i = _fgbonds.begin();i != _fgbonds.end();++i) { currentPattern = i->first; assignments = i->second; if (currentPattern && currentPattern->Match(mol)) { mlist = currentPattern->GetUMapList(); for (matches = mlist.begin(); matches != mlist.end(); ++matches) { // Now loop through the bonds to assign from _fgbonds for (j = 0; j < assignments.size(); j += 3) { // along the assignments vector: atomID1 atomID2 bondOrder a1 = mol.GetAtom((*matches)[ assignments[j] ]); a2 = mol.GetAtom((*matches)[ assignments[j+1 ] ]); if (!a1 || !a2) continue; b1 = a1->GetBond(a2); if (!b1) continue; b1->SetBO(assignments[j+2]); } // bond order assignments } // each match } // current pattern matches } // for(functional groups) // FG with distance and/or bond criteria // Carbonyl oxygen C=O OBSmartsPattern carbo; carbo.Init("[#8D1][#6](*)(*)"); if (carbo.Match(mol)) { mlist = carbo.GetUMapList(); for (l = mlist.begin(); l != mlist.end(); ++l) { a1 = mol.GetAtom((*l)[0]); a2 = mol.GetAtom((*l)[1]); angle = a2->AverageBondAngle(); dist1 = a1->GetDistance(a2); // carbonyl geometries ? if (angle > 115 && angle < 150 && dist1 < 1.28) { if ( !a1->HasDoubleBond() ) {// no double bond already assigned b1 = a1->GetBond(a2); if (!b1 ) continue; b1->SetBO(2); } } } } // Carbonyl oxygen // thione C=S OBSmartsPattern thione; thione.Init("[#16D1][#6](*)(*)"); if (thione.Match(mol)) { mlist = thione.GetUMapList(); for (l = mlist.begin(); l != mlist.end(); ++l) { a1 = mol.GetAtom((*l)[0]); a2 = mol.GetAtom((*l)[1]); angle = a2->AverageBondAngle(); dist1 = a1->GetDistance(a2); // thione geometries ? if (angle > 115 && angle < 150 && dist1 < 1.72) { if ( !a1->HasDoubleBond() ) {// no double bond already assigned b1 = a1->GetBond(a2); if (!b1 ) continue; b1->SetBO(2); } } } } // thione // Isocyanate N=C=O or Isothiocyanate bool dist1OK; OBSmartsPattern isocyanate; isocyanate.Init("[#8,#16;D1][#6D2][#7D2]"); if (isocyanate.Match(mol)) { mlist = isocyanate.GetUMapList(); for (l = mlist.begin(); l != mlist.end(); ++l) { a1 = mol.GetAtom((*l)[0]); a2 = mol.GetAtom((*l)[1]); a3 = mol.GetAtom((*l)[2]); angle = a2->AverageBondAngle(); dist1 = a1->GetDistance(a2); dist2 = a2->GetDistance(a3); // isocyanate geometry or Isotiocyanate geometry ? if (a1->IsOxygen()) dist1OK = dist1 < 1.28; else dist1OK = dist1 < 1.72; if (angle > 150 && dist1OK && dist2 < 1.34) { b1 = a1->GetBond(a2); b2 = a2->GetBond(a3); if (!b1 || !b2) continue; b1->SetBO(2); b2->SetBO(2); } } } // Isocyanate // oxime C=S OBSmartsPattern oxime; oxime.Init("[#6D3][#7D2][#8D2]"); if (oxime.Match(mol)) { mlist = oxime.GetUMapList(); for (l = mlist.begin(); l != mlist.end(); ++l) { a1 = mol.GetAtom((*l)[0]); a2 = mol.GetAtom((*l)[1]); angle = a2->AverageBondAngle(); dist1 = a1->GetDistance(a2); // thione geometries ? if (angle > 110 && angle < 150 && dist1 < 1.4) { if ( !a1->HasDoubleBond() ) {// no double bond already assigned b1 = a1->GetBond(a2); if (!b1 ) continue; b1->SetBO(2); } } } } // oxime }
int main(int argc, char* argv[]) { const array<string, 5> SmartsPatterns = { "[!#1]", // heavy "[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]", // hydrophobic "[a]", // aromatic "[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N&v3;H1,H2]-[!$(*=[O,N,P,S])]),$([N;v3;H0]),$([n,o,s;+0]),F]", // acceptor "[N!H0v3,N!H0+v4,OH+0,SH+0,nH+0]", // donor }; OBConversion obConversion; obConversion.SetInFormat("pdbqt"); while (true) { vector<array<double, 3>> atoms; atoms.reserve(80); stringstream ss; for (string line; getline(cin, line);) { ss << line << endl; const auto record = line.substr(0, 6); if (record == "TORSDO") break; if (record != "ATOM " && record != "HETATM") continue; atoms.push_back({ stod(line.substr(30, 8)), stod(line.substr(38, 8)), stod(line.substr(46, 8)) }); } if (atoms.empty()) break; OBMol obMol; obConversion.Read(&obMol, &ss); array<vector<int>, 5> subsets; for (size_t k = 0; k < 5; ++k) { auto& subset = subsets[k]; subset.reserve(atoms.size()); OBSmartsPattern smarts; smarts.Init(SmartsPatterns[k]); smarts.Match(obMol); for (const auto& map : smarts.GetMapList()) { subset.push_back(map.front() - 1); } } const auto& subset0 = subsets.front(); const auto n = subset0.size(); const auto v = 1.0 / n; array<double, 3> ctd{}; array<double, 3> cst{}; array<double, 3> fct{}; array<double, 3> ftf{}; for (size_t k = 0; k < 3; ++k) { for (const auto i : subset0) { const auto& a = atoms[i]; ctd[k] += a[k]; } ctd[k] *= v; } double cst_dist = numeric_limits<double>::max(); double fct_dist = numeric_limits<double>::lowest(); double ftf_dist = numeric_limits<double>::lowest(); for (const auto i : subset0) { const auto& a = atoms[i]; const auto this_dist = dist2(a, ctd); if (this_dist < cst_dist) { cst = a; cst_dist = this_dist; } if (this_dist > fct_dist) { fct = a; fct_dist = this_dist; } } for (const auto i : subset0) { const auto& a = atoms[i]; const auto this_dist = dist2(a, fct); if (this_dist > ftf_dist) { ftf = a; ftf_dist = this_dist; } } for (const auto& subset : subsets) { const auto n = subset.size(); const auto v = 1.0 / n; for (const auto& rpt : { ctd, cst, fct, ftf }) { vector<double> dists(n); for (size_t i = 0; i < n; ++i) { dists[i] = sqrt(dist2(atoms[subset[i]], rpt)); } array<double, 3> m{}; for (size_t i = 0; i < n; ++i) { const auto d = dists[i]; m[0] += d; } m[0] *= v; for (size_t i = 0; i < n; ++i) { const auto d = dists[i] - m[0]; m[1] += d * d; } m[1] = sqrt(m[1] * v); for (size_t i = 0; i < n; ++i) { const auto d = dists[i] - m[0]; m[2] += d * d * d; } m[2] = cbrt(m[2] * v); cout.write(reinterpret_cast<char*>(m.data()), sizeof(m)); } } } }
int main(int argc,char *argv[]) { // turn off slow sync with C-style output (we don't use it anyway). std::ios::sync_with_stdio(false); if (argc != 1) { cout << "Usage: smilesmatch\n"; cout << " Tests Open Babel SMILES/SMARTS pattern matching." << endl; return 0; } cout << endl << "# Testing SMILES self-matching using SMARTS... \n"; std::ifstream mifs; if (!SafeOpen(mifs, smilestypes_file.c_str())) { cout << "Bail out! Cannot read test data " << smilestypes_file << endl; return -1; // test failed } OBConversion conv(&mifs, &cout); if (! conv.SetInAndOutFormats("SMI","SMI")) { cout << "Bail out! SMILES format is not loaded" << endl; return -1; } unsigned int currentMol = 0; OBSmartsPattern smarts; OBMol mol; string buffer; //read in molecules and see if their SMARTS matches themselves while (getline(mifs, buffer)) { mol.Clear(); conv.ReadString(&mol, buffer); if (mol.Empty()) continue; // trim off any title, etc. string::size_type pos = buffer.find_first_of(" \t\n\r"); if (pos != string::npos) buffer.erase(pos); pos = buffer.find_first_of('.'); if (pos != string::npos) continue; smarts.Init(buffer); if (smarts.Match(mol)) cout << "ok " << ++currentMol << " # SMARTS matched the" << " SMILES molecule\n"; else cout << "not ok " << ++currentMol << " # SMARTS did not match" << " for molecule " << buffer << "\n"; } // output the number of tests run cout << "1.." << currentMol << endl; // Passed Test return 0; }