inline void kill_wrapper(pid_t pid, int sig, int port) { #ifdef _WIN32 if (sig == SIGKILL || port == 0) { assert( handles.count(pid) ); TerminateProcess(handles[pid], 1); // returns failure for "zombie" processes. } else { DBClientConnection conn; try { conn.connect("127.0.0.1:" + BSONObjBuilder::numStr(port)); BSONObj info; BSONObjBuilder b; b.append( "shutdown", 1 ); b.append( "force", 1 ); conn.runCommand( "admin", b.done(), info ); } catch (...) { //Do nothing. This command never returns data to the client and the driver doesn't like that. } } #else int x = kill( pid, sig ); if ( x ) { if ( errno == ESRCH ) { } else { cout << "killFailed: " << errnoWithDescription() << endl; assert( x == 0 ); } } #endif }
/** Shard collection on key */ void mongoDeploy::shardCollection (string mongoSHostPort, string fullCollection, mongo::BSONObj shardKey) { using namespace mongo; BSONObj info; DBClientConnection c; c.connect (mongoSHostPort); BSONObj cmd = BSON ("shardcollection" << fullCollection << "key" << shardKey); cout << cmd << " -> " << endl; c.runCommand ("admin", cmd, info); cout << info << endl; }
/** Enable sharding on given database */ void mongoDeploy::shardDatabase (string mongoSHostPort, string database) { using namespace mongo; BSONObj info; DBClientConnection c; c.connect (mongoSHostPort); BSONObj cmd = BSON ("enablesharding" << database); cout << cmd << " -> " << endl; c.runCommand ("admin", cmd, info); cout << info << endl; }
void info(DBClientConnection &c) { BSONObj info, cmd; cmd = BSONObjBuilder().append( "serverStatus", 1 ).obj(); c.runCommand(DBNAME, cmd, info); cout << info << endl; cmd = BSONObjBuilder().append("replSetGetStatus", 1).obj(); c.runCommand(DBNAME, cmd, info); cout << info << endl; cmd = BSONObjBuilder().append("listDatabases", 1).obj(); c.runCommand(DBNAME, cmd, info); cout << info << endl; // foreach db in dbases { runCommand(db, "dbstats") } cmd = BSONObjBuilder().append("dbstats", 1).obj(); cout << info["databases"].size() << endl; vector<BSONElement> dbases = info["databases"].Array(); cout << dbases.size() << " -- " << dbases.capacity() << endl; for( vector<BSONElement>::iterator iter = dbases.begin(); iter != dbases.end(); ++iter ) { if( iter->ok() ) { BSONObj dbinfo; BSONElement &db = *iter; string dbname; if( !db["name"].ok() ) continue; db["name"].Val(dbname); cout << dbname << endl; c.runCommand(dbname, cmd, dbinfo); cout << dbinfo << endl; } } }
inline void kill_wrapper( pid_t pid, int sig, int port, const BSONObj& opt ) { #ifdef _WIN32 if (sig == SIGKILL || port == 0) { verify( registry._handles.count(pid) ); TerminateProcess(registry._handles[pid], 1); // returns failure for "zombie" processes. } else { DBClientConnection conn; try { conn.connect("127.0.0.1:" + BSONObjBuilder::numStr(port)); BSONElement authObj = opt["auth"]; if ( !authObj.eoo() ){ string errMsg; conn.auth( "admin", authObj["user"].String(), authObj["pwd"].String(), errMsg ); if ( !errMsg.empty() ) { cout << "Failed to authenticate before shutdown: " << errMsg << endl; } } BSONObj info; BSONObjBuilder b; b.append( "shutdown", 1 ); b.append( "force", 1 ); conn.runCommand( "admin", b.done(), info ); } catch (...) { //Do nothing. This command never returns data to the client and the driver doesn't like that. } } #else int x = kill( pid, sig ); if ( x ) { if ( errno == ESRCH ) { } else { log() << "killFailed: " << errnoWithDescription() << endl; verify( x == 0 ); } } #endif }
int main(int argc, char* argv[]) { // Check the required number of command line arguments. if (argc != 5) { cout << "usr host user pwd jobs_path" << endl; return 0; } // Fetch command line arguments. const auto host = argv[1]; const auto user = argv[2]; const auto pwd = argv[3]; const path jobs_path = argv[4]; // Connect to host and authenticate user. DBClientConnection conn; { cout << local_time() << "Connecting to " << host << " and authenticating " << user << endl; string errmsg; if ((!conn.connect(host, errmsg)) || (!conn.auth("istar", user, pwd, errmsg))) { cerr << local_time() << errmsg << endl; return 1; } } // Initialize constants. cout << local_time() << "Initializing" << endl; const auto collection = "istar.usr"; const auto epoch = date(1970, 1, 1); const size_t num_usrs = 2; constexpr array<size_t, num_usrs> qn{{ 12, 60 }}; constexpr array<double, num_usrs> qv{{ 1.0 / qn[0], 1.0 / qn[1] }}; const size_t num_references = 4; const size_t num_subsets = 5; const array<string, num_subsets> SubsetSMARTS {{ "[!#1]", // heavy "[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]", // hydrophobic "[a]", // aromatic "[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N&v3;H1,H2]-[!$(*=[O,N,P,S])]),$([N;v3;H0]),$([n,o,s;+0]),F]", // acceptor "[N!H0v3,N!H0+v4,OH+0,SH+0,nH+0]", // donor }}; // Initialize variables. array<array<double, qn.back()>, 1> qw; array<array<double, qn.back()>, 1> lw; auto q = qw[0]; auto l = lw[0]; // Read ZINC ID file. const string_array<size_t> zincids("16_zincid.txt"); const auto num_ligands = zincids.size(); // Read SMILES file. const string_array<size_t> smileses("16_smiles.txt"); assert(smileses.size() == num_ligands); // Read supplier file. const string_array<size_t> suppliers("16_supplier.txt"); assert(suppliers.size() == num_ligands); // Read property files of floating point types and integer types. const auto zfproperties = read<array<float, 4>>("16_zfprop.f32"); assert(zfproperties.size() == num_ligands); const auto ziproperties = read<array<int16_t, 5>>("16_ziprop.i16"); assert(ziproperties.size() == num_ligands); // Open files for subsequent reading. std::ifstream usrcat_bin("16_usrcat.f64"); stream_array<size_t> ligands("16_ligand.pdbqt"); assert(ligands.size() == num_ligands); array<vector<double>, 2> scores {{ vector<double>(num_ligands, 0), vector<double>(num_ligands, 0) }}; const auto& u0scores = scores[0]; const auto& u1scores = scores[1]; vector<size_t> scase(num_ligands); // Enter event loop. cout << local_time() << "Entering event loop" << endl; bool sleeping = false; while (true) { // Fetch an incompleted job in a first-come-first-served manner. if (!sleeping) cout << local_time() << "Fetching an incompleted job" << endl; BSONObj info; conn.runCommand("istar", BSON("findandmodify" << "usr" << "query" << BSON("done" << BSON("$exists" << false) << "started" << BSON("$exists" << false)) << "sort" << BSON("submitted" << 1) << "update" << BSON("$set" << BSON("started" << Date_t(duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count())))), info); // conn.findAndModify() is available since MongoDB C++ Driver legacy-1.0.0 const auto value = info["value"]; if (value.isNull()) { // No incompleted jobs. Sleep for a while. if (!sleeping) cout << local_time() << "Sleeping" << endl; sleeping = true; this_thread::sleep_for(chrono::seconds(10)); continue; } sleeping = false; const auto job = value.Obj(); // Obtain job properties. const auto _id = job["_id"].OID(); cout << local_time() << "Executing job " << _id.str() << endl; const auto job_path = jobs_path / _id.str(); const auto format = job["format"].String(); const auto email = job["email"].String(); // Parse the user-supplied ligand. OBMol obMol; OBConversion obConversion; obConversion.SetInFormat(format.c_str()); obConversion.ReadFile(&obMol, (job_path / ("ligand." + format)).string()); const auto num_atoms = obMol.NumAtoms(); // obMol.AddHydrogens(); // Adding hydrogens does not seem to affect SMARTS matching. // Classify subset atoms. array<vector<int>, num_subsets> subsets; for (size_t k = 0; k < num_subsets; ++k) { auto& subset = subsets[k]; subset.reserve(num_atoms); OBSmartsPattern smarts; smarts.Init(SubsetSMARTS[k]); smarts.Match(obMol); for (const auto& map : smarts.GetMapList()) { subset.push_back(map.front()); } } const auto& subset0 = subsets.front(); // Check user-provided ligand validity. if (subset0.empty()) { // Record job completion time stamp. const auto millis_since_epoch = duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count(); conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("done" << Date_t(millis_since_epoch)))); // Send error notification email. cout << local_time() << "Sending an error notification email to " << email << endl; MailMessage message; message.setSender("usr <*****@*****.**>"); message.setSubject("Your usr job has failed"); message.setContent("Description: " + job["description"].String() + "\nSubmitted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(job["submitted"].Date().millis))) + " UTC\nFailed: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(millis_since_epoch))) + " UTC\nReason: failed to parse the provided ligand."); message.addRecipient(MailRecipient(MailRecipient::PRIMARY_RECIPIENT, email)); SMTPClientSession session("137.189.91.190"); session.login(); session.sendMessage(message); session.close(); continue; } // Calculate the four reference points. const auto n = subset0.size(); const auto v = 1.0 / n; array<vector3, num_references> references{}; auto& ctd = references[0]; auto& cst = references[1]; auto& fct = references[2]; auto& ftf = references[3]; for (const auto i : subset0) { ctd += obMol.GetAtom(i)->GetVector(); } ctd *= v; double cst_dist = numeric_limits<double>::max(); double fct_dist = numeric_limits<double>::lowest(); double ftf_dist = numeric_limits<double>::lowest(); for (const auto i : subset0) { const auto& a = obMol.GetAtom(i)->GetVector(); const auto this_dist = a.distSq(ctd); if (this_dist < cst_dist) { cst = a; cst_dist = this_dist; } if (this_dist > fct_dist) { fct = a; fct_dist = this_dist; } } for (const auto i : subset0) { const auto& a = obMol.GetAtom(i)->GetVector(); const auto this_dist = a.distSq(fct); if (this_dist > ftf_dist) { ftf = a; ftf_dist = this_dist; } } // Precalculate the distances between each atom and each reference point. array<vector<double>, num_references> dista; for (size_t k = 0; k < num_references; ++k) { const auto& reference = references[k]; auto& dists = dista[k]; dists.resize(1 + num_atoms); // OpenBabel atom index starts from 1. dists[0] is dummy. for (size_t i = 0; i < n; ++i) { dists[subset0[i]] = sqrt(obMol.GetAtom(subset0[i])->GetVector().distSq(reference)); } } // Calculate USR and USRCAT features of the input ligand. size_t qo = 0; for (const auto& subset : subsets) { const auto n = subset.size(); for (size_t k = 0; k < num_references; ++k) { const auto& distp = dista[k]; vector<double> dists(n); for (size_t i = 0; i < n; ++i) { dists[i] = distp[subset[i]]; } array<double, 3> m{}; if (n > 2) { const auto v = 1.0 / n; for (size_t i = 0; i < n; ++i) { const auto d = dists[i]; m[0] += d; } m[0] *= v; for (size_t i = 0; i < n; ++i) { const auto d = dists[i] - m[0]; m[1] += d * d; } m[1] = sqrt(m[1] * v); for (size_t i = 0; i < n; ++i) { const auto d = dists[i] - m[0]; m[2] += d * d * d; } m[2] = cbrt(m[2] * v); } else if (n == 2) { m[0] = 0.5 * (dists[0] + dists[1]); m[1] = 0.5 * fabs(dists[0] - dists[1]); } else if (n == 1) { m[0] = dists[0]; } #pragma unroll for (const auto e : m) { q[qo++] = e; } } } assert(qo == qn.back()); // Compute USR and USRCAT scores. usrcat_bin.seekg(0); for (size_t k = 0; k < num_ligands; ++k) { usrcat_bin.read(reinterpret_cast<char*>(l.data()), sizeof(l)); double s = 0; #pragma unroll for (size_t i = 0, u = 0; u < num_usrs; ++u) { #pragma unroll for (const auto qnu = qn[u]; i < qnu; ++i) { s += fabs(q[i] - l[i]); } scores[u][k] = s; } } assert(usrcat_bin.tellg() == sizeof(l) * num_ligands); // Sort ligands by USRCAT score and then by USR score and then by ZINC ID. iota(scase.begin(), scase.end(), 0); sort(scase.begin(), scase.end(), [&](const size_t val0, const size_t val1) { const auto u1score0 = u1scores[val0]; const auto u1score1 = u1scores[val1]; if (u1score0 == u1score1) { const auto u0score0 = u0scores[val0]; const auto u0score1 = u0scores[val1]; if (u0score0 == u0score1) { return zincids[val0] < zincids[val1]; } return u0score0 < u0score1; } return u1score0 < u1score1; }); // Write results. filtering_ostream log_csv_gz; log_csv_gz.push(gzip_compressor()); log_csv_gz.push(file_sink((job_path / "log.csv.gz").string())); log_csv_gz.setf(ios::fixed, ios::floatfield); log_csv_gz << "ZINC ID,USR score,USRCAT score\n" << setprecision(8); filtering_ostream ligands_pdbqt_gz; ligands_pdbqt_gz.push(gzip_compressor()); ligands_pdbqt_gz.push(file_sink((job_path / "ligands.pdbqt.gz").string())); ligands_pdbqt_gz.setf(ios::fixed, ios::floatfield); for (size_t t = 0; t < 10000; ++t) { const size_t k = scase[t]; const auto zincid = zincids[k].substr(0, 8); // Take another substr() to get rid of the trailing newline. const auto u0score = 1 / (1 + scores[0][k] * qv[0]); const auto u1score = 1 / (1 + scores[1][k] * qv[1]); log_csv_gz << zincid << ',' << u0score << ',' << u1score << '\n'; // Only write conformations of the top ligands to ligands.pdbqt.gz. if (t >= 1000) continue; const auto zfp = zfproperties[k]; const auto zip = ziproperties[k]; ligands_pdbqt_gz << "MODEL " << '\n' << "REMARK 911 " << zincid << setprecision(3) << ' ' << setw(8) << zfp[0] << ' ' << setw(8) << zfp[1] << ' ' << setw(8) << zfp[2] << ' ' << setw(8) << zfp[3] << ' ' << setw(3) << zip[0] << ' ' << setw(3) << zip[1] << ' ' << setw(3) << zip[2] << ' ' << setw(3) << zip[3] << ' ' << setw(3) << zip[4] << '\n' << "REMARK 912 " << smileses[k] // A newline is already included in smileses[k]. << "REMARK 913 " << suppliers[k] // A newline is already included in suppliers[k]. << setprecision(8) << "REMARK 951 USR SCORE: " << setw(10) << u0score << '\n' << "REMARK 952 USRCAT SCORE: " << setw(10) << u1score << '\n' ; const auto lig = ligands[k]; ligands_pdbqt_gz.write(lig.data(), lig.size()); ligands_pdbqt_gz << "ENDMDL\n"; } // Update progress. cout << local_time() << "Setting done time" << endl; const auto millis_since_epoch = duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count(); conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("done" << Date_t(millis_since_epoch)))); // Send completion notification email. cout << local_time() << "Sending a completion notification email to " << email << endl; MailMessage message; message.setSender("istar <*****@*****.**>"); message.setSubject("Your usr job has completed"); message.setContent("Description: " + job["description"].String() + "\nSubmitted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(job["submitted"].Date().millis))) + " UTC\nCompleted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(millis_since_epoch))) + " UTC\nResult: http://istar.cse.cuhk.edu.hk/usr/iview/?" + _id.str()); message.addRecipient(MailRecipient(MailRecipient::PRIMARY_RECIPIENT, email)); SMTPClientSession session("137.189.91.190"); session.login(); session.sendMessage(message); session.close(); } }
inline void kill_wrapper( ProcessId pid, int sig, int port, const BSONObj& opt ) { #ifdef _WIN32 if (sig == SIGKILL || port == 0) { verify( registry._handles.count(pid) ); TerminateProcess(registry._handles[pid], 1); // returns failure for "zombie" processes. return; } std::string eventName = getShutdownSignalName(pid.asUInt32()); HANDLE event = OpenEventA(EVENT_MODIFY_STATE, FALSE, eventName.c_str()); if (event == NULL) { int gle = GetLastError(); if (gle != ERROR_FILE_NOT_FOUND) { warning() << "kill_wrapper OpenEvent failed: " << errnoWithDescription(); } else { log() << "kill_wrapper OpenEvent failed to open event to the process " << pid.asUInt32() << ". It has likely died already or server is running an older version." << " Attempting to shutdown through admin command."; // Back-off to the old way of shutting down the server on Windows, in case we // are managing a pre-2.6.0rc0 service, which did not have the event. // try { DBClientConnection conn; conn.connect("127.0.0.1:" + BSONObjBuilder::numStr(port)); BSONElement authObj = opt["auth"]; if (!authObj.eoo()){ string errMsg; conn.auth("admin", authObj["user"].String(), authObj["pwd"].String(), errMsg); if (!errMsg.empty()) { cout << "Failed to authenticate before shutdown: " << errMsg << endl; } } BSONObj info; BSONObjBuilder b; b.append("shutdown", 1); b.append("force", 1); conn.runCommand("admin", b.done(), info); } catch (...) { // Do nothing. This command never returns data to the client and the driver // doesn't like that. // } } return; } ON_BLOCK_EXIT(CloseHandle, event); bool result = SetEvent(event); if (!result) { error() << "kill_wrapper SetEvent failed: " << errnoWithDescription(); return; } #else int x = kill( pid.toNative(), sig ); if ( x ) { if ( errno == ESRCH ) { } else { log() << "killFailed: " << errnoWithDescription() << endl; verify( x == 0 ); } } #endif }
int main(int argc, char* argv[]) { // Check the required number of command line arguments. if (argc != 5) { cout << "usr host user pwd jobs_path" << endl; return 0; } // Fetch command line arguments. const auto host = argv[1]; const auto user = argv[2]; const auto pwd = argv[3]; const path jobs_path = argv[4]; DBClientConnection conn; { // Connect to host and authenticate user. cout << local_time() << "Connecting to " << host << " and authenticating " << user << endl; string errmsg; if ((!conn.connect(host, errmsg)) || (!conn.auth("istar", user, pwd, errmsg))) { cerr << local_time() << errmsg << endl; return 1; } } // Initialize constants. cout << local_time() << "Initializing" << endl; const auto collection = "istar.usr2"; const size_t num_usrs = 2; const array<string, 2> usr_names{{ "USR", "USRCAT" }}; constexpr array<size_t, num_usrs> qn{{ 12, 60 }}; constexpr array<double, num_usrs> qv{{ 1.0 / qn[0], 1.0 / qn[1] }}; const size_t num_refPoints = 4; const size_t num_subsets = 5; const array<string, num_subsets> SubsetSMARTS {{ "[!#1]", // heavy "[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]", // hydrophobic "[a]", // aromatic "[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N&v3;H1,H2]-[!$(*=[O,N,P,S])]),$([N;v3;H0]),$([n,o,s;+0]),F]", // acceptor "[N!H0v3,N!H0+v4,OH+0,SH+0,nH+0]", // donor }}; const size_t num_hits = 100; // Wrap SMARTS strings to RWMol objects. array<unique_ptr<ROMol>, num_subsets> SubsetMols; for (size_t k = 0; k < num_subsets; ++k) { SubsetMols[k].reset(reinterpret_cast<ROMol*>(SmartsToMol(SubsetSMARTS[k]))); } // Read ZINC ID file. const string_array<size_t> zincids("16/zincid.txt"); const auto num_ligands = zincids.size(); cout << local_time() << "Found " << num_ligands << " database molecules" << endl; // Read SMILES file. const string_array<size_t> smileses("16/smiles.txt"); assert(smileses.size() == num_ligands); // Read supplier file. const string_array<size_t> suppliers("16/supplier.txt"); assert(suppliers.size() == num_ligands); // Read property files of floating point types and integer types. const auto zfproperties = read<array<float, 4>>("16/zfprop.f32"); assert(zfproperties.size() == num_ligands); const auto ziproperties = read<array<int16_t, 5>>("16/ziprop.i16"); assert(ziproperties.size() == num_ligands); // Read cumulative number of conformers file. const auto mconfss = read<size_t>("16/mconfs.u64"); const auto num_conformers = mconfss.back(); assert(mconfss.size() == num_ligands); assert(num_conformers >= num_ligands); cout << local_time() << "Found " << num_conformers << " database conformers" << endl; // Read feature file. const auto features = read<array<double, qn.back()>>("16/usrcat.f64"); assert(features.size() == num_conformers); // Read ligand footer file and open ligand SDF file for seeking and reading. stream_array<size_t> ligands("16/ligand.sdf"); assert(ligands.size() == num_conformers); // Initialize variables. array<vector<int>, num_subsets> subsets; array<vector<double>, num_refPoints> dista; alignas(32) array<double, qn.back()> q; // Initialize vectors to store compounds' primary score and their corresponding conformer. vector<double> scores(num_ligands); // Primary score of molecules. vector<size_t> cnfids(num_ligands); // ID of conformer with the best primary score. const auto compare = [&](const size_t val0, const size_t val1) // Sort by the primary score. { return scores[val0] < scores[val1]; }; // Initialize an io service pool and create worker threads for later use. const size_t num_threads = thread::hardware_concurrency(); cout << local_time() << "Creating an io service pool of " << num_threads << " worker threads" << endl; io_service_pool io(num_threads); safe_counter<size_t> cnt; // Initialize the number of chunks and the number of molecules per chunk. const auto num_chunks = num_threads << 4; const auto chunk_size = 1 + (num_ligands - 1) / num_chunks; assert(chunk_size * num_chunks >= num_ligands); assert(chunk_size >= num_hits); cout << local_time() << "Using " << num_chunks << " chunks and a chunk size of " << chunk_size << endl; vector<size_t> scase(num_ligands); vector<size_t> zcase(num_hits * (num_chunks - 1) + min(num_hits, num_ligands - chunk_size * (num_chunks - 1))); // The last chunk might have fewer than num_hits records. // Enter event loop. cout << local_time() << "Entering event loop" << endl; cout.setf(ios::fixed, ios::floatfield); bool sleeping = false; while (true) { // Fetch an incompleted job in a first-come-first-served manner. if (!sleeping) cout << local_time() << "Fetching an incompleted job" << endl; BSONObj info; const auto started = milliseconds_since_epoch(); conn.runCommand("istar", BSON("findandmodify" << "usr2" << "query" << BSON("started" << BSON("$exists" << false)) << "sort" << BSON("submitted" << 1) << "update" << BSON("$set" << BSON("started" << started))), info); // conn.findAndModify() is available since MongoDB C++ Driver legacy-1.0.0 const auto value = info["value"]; if (value.isNull()) { // No incompleted jobs. Sleep for a while. if (!sleeping) cout << local_time() << "Sleeping" << endl; sleeping = true; this_thread::sleep_for(chrono::seconds(2)); continue; } sleeping = false; const auto job = value.Obj(); // Obtain job properties. const auto _id = job["_id"].OID(); cout << local_time() << "Executing job " << _id.str() << endl; const auto job_path = jobs_path / _id.str(); const size_t usr0 = job["usr"].Int(); // Specify the primary sorting score. 0: USR; 1: USRCAT. assert(usr0 == 0 || usr0 == 1); const auto usr1 = usr0 ^ 1; const auto qnu0 = qn[usr0]; const auto qnu1 = qn[usr1]; // Read and validate the user-supplied SDF file. cout << local_time() << "Reading and validating the query file" << endl; SDMolSupplier sup((job_path / "query.sdf").string(), true, false, true); // sanitize, removeHs, strictParsing if (!sup.length() || !sup.atEnd()) { const auto error = 1; cout << local_time() << "Failed to parse the query file, error code = " << error << endl; conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("completed" << milliseconds_since_epoch() << "error" << error))); continue; } // Process each of the query molecules sequentially. const auto num_queries = 1; // Restrict the number of query molecules to 1. Setting num_queries = sup.length() to execute any number of query molecules. for (unsigned int query_number = 0; query_number < num_queries; ++query_number) { cout << local_time() << "Parsing query molecule " << query_number << endl; const unique_ptr<ROMol> qry_ptr(sup.next()); // Calling next() may print "ERROR: Could not sanitize molecule on line XXXX" to stderr. auto& qryMol = *qry_ptr; // Get the number of atoms, including and excluding hydrogens. const auto num_atoms = qryMol.getNumAtoms(); const auto num_heavy_atoms = qryMol.getNumHeavyAtoms(); assert(num_heavy_atoms); cout << local_time() << "Found " << num_atoms << " atoms and " << num_heavy_atoms << " heavy atoms" << endl; // Create an output directory. cout << local_time() << "Creating output directory" << endl; const auto output_dir = job_path / to_string(query_number); create_directory(output_dir); // Draw a SVG. cout << local_time() << "Drawing a SVG" << endl; { const unique_ptr<ROMol> qrz_ptr(removeHs(qryMol)); auto& qrzMol = *qrz_ptr; compute2DCoords(qrzMol); boost::filesystem::ofstream ofs(output_dir / "query.svg"); ofs << DrawingToSVG(MolToDrawing(qrzMol)); } // Calculate Morgan fingerprint. cout << local_time() << "Calculating Morgan fingerprint" << endl; const unique_ptr<SparseIntVect<uint32_t>> qryFp(getFingerprint(qryMol, 2)); // Classify atoms to pharmacophoric subsets. cout << local_time() << "Classifying atoms into subsets" << endl; for (size_t k = 0; k < num_subsets; ++k) { vector<vector<pair<int, int>>> matchVect; SubstructMatch(qryMol, *SubsetMols[k], matchVect); const auto num_matches = matchVect.size(); auto& subset = subsets[k]; subset.resize(num_matches); for (size_t i = 0; i < num_matches; ++i) { subset[i] = matchVect[i].front().second; } cout << local_time() << "Found " << num_matches << " atoms for subset " << k << endl; } const auto& subset0 = subsets.front(); assert(subset0.size() == num_heavy_atoms); // Calculate the four reference points. cout << local_time() << "Calculating " << num_refPoints << " reference points" << endl; const auto qryRefPoints = calcRefPoints(qryMol, subset0); const Point3DConstPtrVect qryRefPointv {{ &qryRefPoints[0], &qryRefPoints[1], &qryRefPoints[2], &qryRefPoints[3], }}; // Precalculate the distances of heavy atoms to the reference points, given that subsets[1 to 4] are subsets of subsets[0]. cout << local_time() << "Calculating " << num_heavy_atoms * num_refPoints << " pairwise distances" << endl; const auto& qryCnf = qryMol.getConformer(); for (size_t k = 0; k < num_refPoints; ++k) { const auto& refPoint = qryRefPoints[k]; auto& distp = dista[k]; distp.resize(num_atoms); for (size_t i = 0; i < num_heavy_atoms; ++i) { distp[subset0[i]] = sqrt(dist2(qryCnf.getAtomPos(subset0[i]), refPoint)); } } // Loop over pharmacophoric subsets and reference points. cout << local_time() << "Calculating " << 3 * num_refPoints * num_subsets << " moments of USRCAT feature" << endl; size_t qo = 0; for (const auto& subset : subsets) { const auto n = subset.size(); for (size_t k = 0; k < num_refPoints; ++k) { // Load distances from precalculated ones. const auto& distp = dista[k]; vector<double> dists(n); for (size_t i = 0; i < n; ++i) { dists[i] = distp[subset[i]]; } // Compute moments. array<double, 3> m{}; if (n > 2) { const auto v = 1.0 / n; for (size_t i = 0; i < n; ++i) { const auto d = dists[i]; m[0] += d; } m[0] *= v; for (size_t i = 0; i < n; ++i) { const auto d = dists[i] - m[0]; m[1] += d * d; } m[1] = sqrt(m[1] * v); for (size_t i = 0; i < n; ++i) { const auto d = dists[i] - m[0]; m[2] += d * d * d; } m[2] = cbrt(m[2] * v); } else if (n == 2) { m[0] = 0.5 * (dists[0] + dists[1]); m[1] = 0.5 * fabs(dists[0] - dists[1]); } else if (n == 1) { m[0] = dists[0]; } for (const auto e : m) { q[qo++] = e; } } } assert(qo == qn.back()); // Compute USR and USRCAT scores. cout << local_time() << "Calculating " << num_ligands << " " << usr_names[usr0] << " scores" << endl; scores.assign(scores.size(), numeric_limits<double>::max()); iota(scase.begin(), scase.end(), 0); cnt.init(num_chunks); for (size_t l = 0; l < num_chunks; ++l) { io.post([&,l]() { // Loop over molecules of the current chunk. const auto chunk_beg = chunk_size * l; const auto chunk_end = min(chunk_beg + chunk_size, num_ligands); for (size_t k = chunk_beg; k < chunk_end; ++k) { // Loop over conformers of the current molecule and calculate their primary score. auto& scorek = scores[k]; size_t j = k ? mconfss[k - 1] : 0; for (const auto mconfs = mconfss[k]; j < mconfs; ++j) { const auto& d = features[j]; double s = 0; for (size_t i = 0; i < qnu0; ++i) { s += abs(q[i] - d[i]); if (s >= scorek) break; } if (s < scorek) { scorek = s; cnfids[k] = j; } } } // Sort the scores of molecules of the current chunk. sort(scase.begin() + chunk_beg, scase.begin() + chunk_end, compare); // Copy the indexes of top hits of the current chunk to a global vector for final sorting. copy_n(scase.begin() + chunk_beg, min(num_hits, chunk_end - chunk_beg), zcase.begin() + num_hits * l); cnt.increment(); }); } cnt.wait(); // Sort the top hits from chunks. cout << local_time() << "Sorting " << zcase.size() << " hits by " << usr_names[usr0] << " score" << endl; sort(zcase.begin(), zcase.end(), compare); // Create output directory and write output files. cout << local_time() << "Writing output files" << endl; SDWriter hits_sdf((output_dir / "hits.sdf").string()); boost::filesystem::ofstream hits_csv(output_dir / "hits.csv"); hits_csv.setf(ios::fixed, ios::floatfield); hits_csv << "ZINC ID,USR score,USRCAT score,2D Tanimoto score,Molecular weight (g/mol),Partition coefficient xlogP,Apolar desolvation (kcal/mol),Polar desolvation (kcal/mol),Hydrogen bond donors,Hydrogen bond acceptors,Polar surface area tPSA (Å^2),Net charge,Rotatable bonds,SMILES,Vendors and annotations\n"; for (size_t l = 0; l < num_hits; ++l) { // Obtain indexes to the hit molecule and the hit conformer. const auto k = zcase[l]; const auto j = cnfids[k]; // Read SDF content of the hit conformer. const auto lig = ligands[j]; // Construct a RDKit ROMol object. istringstream iss(lig); SDMolSupplier sup(&iss, false, true, false, true); assert(sup.length() == 1); assert(sup.atEnd()); const unique_ptr<ROMol> hit_ptr(sup.next()); auto& hitMol = *hit_ptr; // Calculate Morgan fingerprint. const unique_ptr<SparseIntVect<uint32_t>> hitFp(getFingerprint(hitMol, 2)); // Calculate Tanimoto similarity. const auto ts = TanimotoSimilarity(*qryFp, *hitFp); // Find heavy atoms. vector<vector<pair<int, int>>> matchVect; SubstructMatch(hitMol, *SubsetMols[0], matchVect); const auto num_matches = matchVect.size(); assert(num_matches == hitMol.getNumHeavyAtoms()); vector<int> hitHeavyAtoms(num_matches); for (size_t i = 0; i < num_matches; ++i) { hitHeavyAtoms[i] = matchVect[i].front().second; assert(hitHeavyAtoms[i] == i); // hitHeavyAtoms can be constructed using iota(hitHeavyAtoms.begin(), hitHeavyAtoms.end(), 0); because for RDKit-generated SDF molecules, heavy atom are always the first few atoms. } // Calculate the four reference points. const auto hitRefPoints = calcRefPoints(hitMol, hitHeavyAtoms); const Point3DConstPtrVect hitRefPointv {{ &hitRefPoints[0], &hitRefPoints[1], &hitRefPoints[2], &hitRefPoints[3], }}; // Calculate a 3D transform from the four reference points of the hit conformer to those of the query molecule. Transform3D trans; AlignPoints(qryRefPointv, hitRefPointv, trans); // Apply the 3D transform to all atoms of the hit conformer. auto& hitCnf = hitMol.getConformer(); transformConformer(hitCnf, trans); // Write the aligned hit conformer. hits_sdf.write(hitMol); // Calculate the secondary score of the saved conformer, which has the best primary score. const auto& d = features[j]; double s = 0; for (size_t i = 0; i < qnu1; ++i) { s += abs(q[i] - d[i]); } const auto u0score = 1 / (1 + scores[k] * qv[usr0]); // Primary score of the current molecule. const auto u1score = 1 / (1 + s * qv[usr1]); // Secondary score of the current molecule. const auto zincid = zincids[k].substr(0, 8); // Take another substr() to get rid of the trailing newline. const auto zfp = zfproperties[k]; const auto zip = ziproperties[k]; const auto smiles = smileses[k]; // A newline is already included in smileses[k]. const auto supplier = suppliers[k]; // A newline is already included in suppliers[k]. hits_csv << zincid << setprecision(8) << ',' << (usr1 ? u0score : u1score) << ',' << (usr1 ? u1score : u0score) << ',' << ts << setprecision(3) << ',' << zfp[0] << ',' << zfp[1] << ',' << zfp[2] << ',' << zfp[3] << ',' << zip[0] << ',' << zip[1] << ',' << zip[2] << ',' << zip[3] << ',' << zip[4] << ',' << smiles.substr(0, smiles.length() - 1) // Get rid of the trailing newline. << ',' << supplier.substr(0, supplier.length() - 1) // Get rid of the trailing newline. << '\n' ; } } // Update job status. cout << local_time() << "Setting completed time" << endl; const auto completed = milliseconds_since_epoch(); conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("completed" << completed << "nqueries" << num_queries))); // Calculate runtime in seconds and screening speed in million conformers per second. const auto runtime = (completed - started) * 0.001; const auto speed = num_conformers * 0.000001 * num_queries / runtime; cout << local_time() << "Completed " << num_queries << " " << (num_queries == 1 ? "query" : "queries") << " in " << setprecision(3) << runtime << " seconds" << endl << local_time() << "Screening speed was " << setprecision(0) << speed << " M conformers per second" << endl ; } }