예제 #1
0
    bool DBClientReplicaSet::auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword ) {
        DBClientConnection * m = checkMaster();

        // first make sure it actually works
        if( ! m->auth(dbname, username, pwd, errmsg, digestPassword ) )
            return false;

        // now that it does, we should save so that for a new node we can auth
        _auths.push_back( AuthInfo( dbname , username , pwd , digestPassword ) );
        return true;
    }
예제 #2
0
      inline void kill_wrapper( pid_t pid, int sig, int port, const BSONObj& opt ) {
#ifdef _WIN32
            if (sig == SIGKILL || port == 0) {
                verify( registry._handles.count(pid) );
                TerminateProcess(registry._handles[pid], 1); // returns failure for "zombie" processes.
            }
            else {
                DBClientConnection conn;
                try {
                    conn.connect("127.0.0.1:" + BSONObjBuilder::numStr(port));

                    BSONElement authObj = opt["auth"];

                    if ( !authObj.eoo() ){
                        string errMsg;
                        conn.auth( "admin", authObj["user"].String(),
                                   authObj["pwd"].String(), errMsg );

                        if ( !errMsg.empty() ) {
                            cout << "Failed to authenticate before shutdown: "
                                 << errMsg << endl;
                        }
                    }

                    BSONObj info;
                    BSONObjBuilder b;
                    b.append( "shutdown", 1 );
                    b.append( "force", 1 );
                    conn.runCommand( "admin", b.done(), info );
                }
                catch (...) {
                    //Do nothing. This command never returns data to the client and the driver doesn't like that.
                }
            }
#else
            int x = kill( pid, sig );
            if ( x ) {
                if ( errno == ESRCH ) {
                }
                else {
                    log() << "killFailed: " << errnoWithDescription() << endl;
                    verify( x == 0 );
                }
            }

#endif
        }
예제 #3
0
파일: dbclient.cpp 프로젝트: tanfulai/mongo
    /* TODO: unit tests should run this? */
    void testDbEval() {
        DBClientConnection c;
        string err;
        if ( !c.connect("localhost", err) ) {
            out() << "can't connect to server " << err << endl;
            return;
        }

        if( !c.auth("dwight", "u", "p", err) ) { 
            out() << "can't authenticate " << err << endl;
            return;
        }

        BSONObj info;
        BSONElement retValue;
        BSONObjBuilder b;
        b.append("0", 99);
        BSONObj args = b.done();
        bool ok = c.eval("dwight", "function() { return args[0]; }", info, retValue, &args);
        out() << "eval ok=" << ok << endl;
        out() << "retvalue=" << retValue.toString() << endl;
        out() << "info=" << info.toString() << endl;

        out() << endl;

        int x = 3;
        assert( c.eval("dwight", "function() { return 3; }", x) );

        out() << "***\n";

        BSONObj foo = fromjson("{\"x\":7}");
        out() << foo.toString() << endl;
        int res=0;
        ok = c.eval("dwight", "function(parm1) { return parm1.x; }", foo, res);
        out() << ok << " retval:" << res << endl;
    }
예제 #4
0
파일: isself.cpp 프로젝트: tanakh/mongo
bool HostAndPort::isSelf() const {

    if( dyn() ) {
        MONGO_LOG(2) << "isSelf " << _dynName << ' ' << dynHostMyName() << endl;
        return dynHostMyName() == _dynName;
    }

    int _p = port();
    int p = _p == -1 ? CmdLine::DefaultDBPort : _p;

    if( p != cmdLine.port ) {
        // shortcut - ports have to match at the very least
        return false;
    }

    string host = str::stream() << this->host() << ":" << p;

    {
        // check cache for this host
        // debatably something _could_ change, but I'm not sure right now (erh 10/14/2010)
        scoped_lock lk( isSelfCommand._cacheLock );
        map<string,bool>::const_iterator i = isSelfCommand._cache.find( host );
        if ( i != isSelfCommand._cache.end() )
            return i->second;
    }

#if !defined(_WIN32) && !defined(__sunos__)
    // on linux and os x we can do a quick check for an ip match

    const vector<string> myaddrs = getMyAddrs();
    const vector<string> addrs = getAllIPs(_host);

    for (vector<string>::const_iterator i=myaddrs.begin(), iend=myaddrs.end(); i!=iend; ++i) {
        for (vector<string>::const_iterator j=addrs.begin(), jend=addrs.end(); j!=jend; ++j) {
            string a = *i;
            string b = *j;

            if ( a == b ||
                    ( str::startsWith( a , "127." ) && str::startsWith( b , "127." ) )  // 127. is all loopback
               ) {

                // add to cache
                scoped_lock lk( isSelfCommand._cacheLock );
                isSelfCommand._cache[host] = true;
                return true;
            }
        }
    }

#endif

    if ( ! Listener::getTimeTracker() ) {
        // this ensures we are actually running a server
        // this may return true later, so may want to retry
        return false;
    }

    try {
        isSelfCommand.init();
        DBClientConnection conn;
        string errmsg;
        if ( ! conn.connect( host , errmsg ) ) {
            // should this go in the cache?
            return false;
        }

        if (!noauth && cmdLine.keyFile &&
                !conn.auth("local", internalSecurity.user, internalSecurity.pwd, errmsg, false)) {
            return false;
        }

        BSONObj out;
        bool ok = conn.simpleCommand( "admin" , &out , "_isSelf" );
        bool me = ok && out["id"].type() == jstOID && isSelfCommand._id == out["id"].OID();

        // add to cache
        scoped_lock lk( isSelfCommand._cacheLock );
        isSelfCommand._cache[host] = me;

        return me;
    }
    catch ( std::exception& e ) {
        warning() << "could't check isSelf (" << host << ") " << e.what() << endl;
    }

    return false;
}
예제 #5
0
int main(int argc, char* argv[])
{
	// Check the required number of command line arguments.
	if (argc != 5)
	{
		cout << "usr host user pwd jobs_path" << endl;
		return 0;
	}

	// Fetch command line arguments.
	const auto host = argv[1];
	const auto user = argv[2];
	const auto pwd = argv[3];
	const path jobs_path = argv[4];

	// Connect to host and authenticate user.
	DBClientConnection conn;
	{
		cout << local_time() << "Connecting to " << host << " and authenticating " << user << endl;
		string errmsg;
		if ((!conn.connect(host, errmsg)) || (!conn.auth("istar", user, pwd, errmsg)))
		{
			cerr << local_time() << errmsg << endl;
			return 1;
		}
	}

	// Initialize constants.
	cout << local_time() << "Initializing" << endl;
	const auto collection = "istar.usr";
	const auto epoch = date(1970, 1, 1);
	const size_t num_usrs = 2;
	constexpr array<size_t, num_usrs> qn{{ 12, 60 }};
	constexpr array<double, num_usrs> qv{{ 1.0 / qn[0], 1.0 / qn[1] }};
	const size_t num_references = 4;
	const size_t num_subsets = 5;
	const array<string, num_subsets> SubsetSMARTS
	{{
		"[!#1]", // heavy
		"[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]", // hydrophobic
		"[a]", // aromatic
		"[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N&v3;H1,H2]-[!$(*=[O,N,P,S])]),$([N;v3;H0]),$([n,o,s;+0]),F]", // acceptor
		"[N!H0v3,N!H0+v4,OH+0,SH+0,nH+0]", // donor
	}};

	// Initialize variables.
	array<array<double, qn.back()>, 1> qw;
	array<array<double, qn.back()>, 1> lw;
	auto q = qw[0];
	auto l = lw[0];

	// Read ZINC ID file.
	const string_array<size_t> zincids("16_zincid.txt");
	const auto num_ligands = zincids.size();

	// Read SMILES file.
	const string_array<size_t> smileses("16_smiles.txt");
	assert(smileses.size() == num_ligands);

	// Read supplier file.
	const string_array<size_t> suppliers("16_supplier.txt");
	assert(suppliers.size() == num_ligands);

	// Read property files of floating point types and integer types.
	const auto zfproperties = read<array<float, 4>>("16_zfprop.f32");
	assert(zfproperties.size() == num_ligands);
	const auto ziproperties = read<array<int16_t, 5>>("16_ziprop.i16");
	assert(ziproperties.size() == num_ligands);

	// Open files for subsequent reading.
	std::ifstream usrcat_bin("16_usrcat.f64");
	stream_array<size_t> ligands("16_ligand.pdbqt");
	assert(ligands.size() == num_ligands);
	array<vector<double>, 2> scores
	{{
		vector<double>(num_ligands, 0),
		vector<double>(num_ligands, 0)
	}};
	const auto& u0scores = scores[0];
	const auto& u1scores = scores[1];
	vector<size_t> scase(num_ligands);

	// Enter event loop.
	cout << local_time() << "Entering event loop" << endl;
	bool sleeping = false;
	while (true)
	{
		// Fetch an incompleted job in a first-come-first-served manner.
		if (!sleeping) cout << local_time() << "Fetching an incompleted job" << endl;
		BSONObj info;
		conn.runCommand("istar", BSON("findandmodify" << "usr" << "query" << BSON("done" << BSON("$exists" << false) << "started" << BSON("$exists" << false)) << "sort" << BSON("submitted" << 1) << "update" << BSON("$set" << BSON("started" << Date_t(duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count())))), info); // conn.findAndModify() is available since MongoDB C++ Driver legacy-1.0.0
		const auto value = info["value"];
		if (value.isNull())
		{
			// No incompleted jobs. Sleep for a while.
			if (!sleeping) cout << local_time() << "Sleeping" << endl;
			sleeping = true;
			this_thread::sleep_for(chrono::seconds(10));
			continue;
		}
		sleeping = false;
		const auto job = value.Obj();

		// Obtain job properties.
		const auto _id = job["_id"].OID();
		cout << local_time() << "Executing job " << _id.str() << endl;
		const auto job_path = jobs_path / _id.str();
		const auto format = job["format"].String();
		const auto email = job["email"].String();

		// Parse the user-supplied ligand.
		OBMol obMol;
		OBConversion obConversion;
		obConversion.SetInFormat(format.c_str());
		obConversion.ReadFile(&obMol, (job_path / ("ligand." + format)).string());
		const auto num_atoms = obMol.NumAtoms();
//		obMol.AddHydrogens(); // Adding hydrogens does not seem to affect SMARTS matching.

		// Classify subset atoms.
		array<vector<int>, num_subsets> subsets;
		for (size_t k = 0; k < num_subsets; ++k)
		{
			auto& subset = subsets[k];
			subset.reserve(num_atoms);
			OBSmartsPattern smarts;
			smarts.Init(SubsetSMARTS[k]);
			smarts.Match(obMol);
			for (const auto& map : smarts.GetMapList())
			{
				subset.push_back(map.front());
			}
		}
		const auto& subset0 = subsets.front();

		// Check user-provided ligand validity.
		if (subset0.empty())
		{
			// Record job completion time stamp.
			const auto millis_since_epoch = duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count();
			conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("done" << Date_t(millis_since_epoch))));

			// Send error notification email.
			cout << local_time() << "Sending an error notification email to " << email << endl;
			MailMessage message;
			message.setSender("usr <*****@*****.**>");
			message.setSubject("Your usr job has failed");
			message.setContent("Description: " + job["description"].String() + "\nSubmitted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(job["submitted"].Date().millis))) + " UTC\nFailed: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(millis_since_epoch))) + " UTC\nReason: failed to parse the provided ligand.");
			message.addRecipient(MailRecipient(MailRecipient::PRIMARY_RECIPIENT, email));
			SMTPClientSession session("137.189.91.190");
			session.login();
			session.sendMessage(message);
			session.close();
			continue;
		}

		// Calculate the four reference points.
		const auto n = subset0.size();
		const auto v = 1.0 / n;
		array<vector3, num_references> references{};
		auto& ctd = references[0];
		auto& cst = references[1];
		auto& fct = references[2];
		auto& ftf = references[3];
		for (const auto i : subset0)
		{
			ctd += obMol.GetAtom(i)->GetVector();
		}
		ctd *= v;
		double cst_dist = numeric_limits<double>::max();
		double fct_dist = numeric_limits<double>::lowest();
		double ftf_dist = numeric_limits<double>::lowest();
		for (const auto i : subset0)
		{
			const auto& a = obMol.GetAtom(i)->GetVector();
			const auto this_dist = a.distSq(ctd);
			if (this_dist < cst_dist)
			{
				cst = a;
				cst_dist = this_dist;
			}
			if (this_dist > fct_dist)
			{
				fct = a;
				fct_dist = this_dist;
			}
		}
		for (const auto i : subset0)
		{
			const auto& a = obMol.GetAtom(i)->GetVector();
			const auto this_dist = a.distSq(fct);
			if (this_dist > ftf_dist)
			{
				ftf = a;
				ftf_dist = this_dist;
			}
		}

		// Precalculate the distances between each atom and each reference point.
		array<vector<double>, num_references> dista;
		for (size_t k = 0; k < num_references; ++k)
		{
			const auto& reference = references[k];
			auto& dists = dista[k];
			dists.resize(1 + num_atoms); // OpenBabel atom index starts from 1. dists[0] is dummy.
			for (size_t i = 0; i < n; ++i)
			{
				dists[subset0[i]] = sqrt(obMol.GetAtom(subset0[i])->GetVector().distSq(reference));
			}
		}

		// Calculate USR and USRCAT features of the input ligand.
		size_t qo = 0;
		for (const auto& subset : subsets)
		{
			const auto n = subset.size();
			for (size_t k = 0; k < num_references; ++k)
			{
				const auto& distp = dista[k];
				vector<double> dists(n);
				for (size_t i = 0; i < n; ++i)
				{
					dists[i] = distp[subset[i]];
				}
				array<double, 3> m{};
				if (n > 2)
				{
					const auto v = 1.0 / n;
					for (size_t i = 0; i < n; ++i)
					{
						const auto d = dists[i];
						m[0] += d;
					}
					m[0] *= v;
					for (size_t i = 0; i < n; ++i)
					{
						const auto d = dists[i] - m[0];
						m[1] += d * d;
					}
					m[1] = sqrt(m[1] * v);
					for (size_t i = 0; i < n; ++i)
					{
						const auto d = dists[i] - m[0];
						m[2] += d * d * d;
					}
					m[2] = cbrt(m[2] * v);
				}
				else if (n == 2)
				{
					m[0] = 0.5 *     (dists[0] + dists[1]);
					m[1] = 0.5 * fabs(dists[0] - dists[1]);
				}
				else if (n == 1)
				{
					m[0] = dists[0];
				}
				#pragma unroll
				for (const auto e : m)
				{
					q[qo++] = e;
				}
			}
		}
		assert(qo == qn.back());

		// Compute USR and USRCAT scores.
		usrcat_bin.seekg(0);
		for (size_t k = 0; k < num_ligands; ++k)
		{
			usrcat_bin.read(reinterpret_cast<char*>(l.data()), sizeof(l));
			double s = 0;
			#pragma unroll
			for (size_t i = 0, u = 0; u < num_usrs; ++u)
			{
				#pragma unroll
				for (const auto qnu = qn[u]; i < qnu; ++i)
				{
					s += fabs(q[i] - l[i]);
				}
				scores[u][k] = s;
			}
		}
		assert(usrcat_bin.tellg() == sizeof(l) * num_ligands);

		// Sort ligands by USRCAT score and then by USR score and then by ZINC ID.
		iota(scase.begin(), scase.end(), 0);
		sort(scase.begin(), scase.end(), [&](const size_t val0, const size_t val1)
		{
			const auto u1score0 = u1scores[val0];
			const auto u1score1 = u1scores[val1];
			if (u1score0 == u1score1)
			{
				const auto u0score0 = u0scores[val0];
				const auto u0score1 = u0scores[val1];
				if (u0score0 == u0score1)
				{
					return zincids[val0] < zincids[val1];
				}
				return u0score0 < u0score1;
			}
			return u1score0 < u1score1;
		});

		// Write results.
		filtering_ostream log_csv_gz;
		log_csv_gz.push(gzip_compressor());
		log_csv_gz.push(file_sink((job_path / "log.csv.gz").string()));
		log_csv_gz.setf(ios::fixed, ios::floatfield);
		log_csv_gz << "ZINC ID,USR score,USRCAT score\n" << setprecision(8);
		filtering_ostream ligands_pdbqt_gz;
		ligands_pdbqt_gz.push(gzip_compressor());
		ligands_pdbqt_gz.push(file_sink((job_path / "ligands.pdbqt.gz").string()));
		ligands_pdbqt_gz.setf(ios::fixed, ios::floatfield);
		for (size_t t = 0; t < 10000; ++t)
		{
			const size_t k = scase[t];
			const auto zincid = zincids[k].substr(0, 8); // Take another substr() to get rid of the trailing newline.
			const auto u0score = 1 / (1 + scores[0][k] * qv[0]);
			const auto u1score = 1 / (1 + scores[1][k] * qv[1]);
			log_csv_gz << zincid << ',' << u0score << ',' << u1score << '\n';

			// Only write conformations of the top ligands to ligands.pdbqt.gz.
			if (t >= 1000) continue;

			const auto zfp = zfproperties[k];
			const auto zip = ziproperties[k];
			ligands_pdbqt_gz
				<< "MODEL " << '\n'
				<< "REMARK 911 " << zincid
				<< setprecision(3)
				<< ' ' << setw(8) << zfp[0]
				<< ' ' << setw(8) << zfp[1]
				<< ' ' << setw(8) << zfp[2]
				<< ' ' << setw(8) << zfp[3]
				<< ' ' << setw(3) << zip[0]
				<< ' ' << setw(3) << zip[1]
				<< ' ' << setw(3) << zip[2]
				<< ' ' << setw(3) << zip[3]
				<< ' ' << setw(3) << zip[4]
				<< '\n'
				<< "REMARK 912 " << smileses[k]  // A newline is already included in smileses[k].
				<< "REMARK 913 " << suppliers[k] // A newline is already included in suppliers[k].
				<< setprecision(8)
				<< "REMARK 951    USR SCORE: " << setw(10) << u0score << '\n'
				<< "REMARK 952 USRCAT SCORE: " << setw(10) << u1score << '\n'
			;
			const auto lig = ligands[k];
			ligands_pdbqt_gz.write(lig.data(), lig.size());
			ligands_pdbqt_gz << "ENDMDL\n";
		}

		// Update progress.
		cout << local_time() << "Setting done time" << endl;
		const auto millis_since_epoch = duration_cast<std::chrono::milliseconds>(system_clock::now().time_since_epoch()).count();
		conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("done" << Date_t(millis_since_epoch))));

		// Send completion notification email.
		cout << local_time() << "Sending a completion notification email to " << email << endl;
		MailMessage message;
		message.setSender("istar <*****@*****.**>");
		message.setSubject("Your usr job has completed");
		message.setContent("Description: " + job["description"].String() + "\nSubmitted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(job["submitted"].Date().millis))) + " UTC\nCompleted: " + to_simple_string(ptime(epoch, boost::posix_time::milliseconds(millis_since_epoch))) + " UTC\nResult: http://istar.cse.cuhk.edu.hk/usr/iview/?" + _id.str());
		message.addRecipient(MailRecipient(MailRecipient::PRIMARY_RECIPIENT, email));
		SMTPClientSession session("137.189.91.190");
		session.login();
		session.sendMessage(message);
		session.close();
	}
}
예제 #6
0
      inline void kill_wrapper( ProcessId pid, int sig, int port, const BSONObj& opt ) {
#ifdef _WIN32
            if (sig == SIGKILL || port == 0) {
                verify( registry._handles.count(pid) );
                TerminateProcess(registry._handles[pid], 1); // returns failure for "zombie" processes.
                return;
            }

            std::string eventName = getShutdownSignalName(pid.asUInt32());

            HANDLE event = OpenEventA(EVENT_MODIFY_STATE, FALSE, eventName.c_str());
            if (event == NULL) {
                int gle = GetLastError();
                if (gle != ERROR_FILE_NOT_FOUND) {
                    warning() << "kill_wrapper OpenEvent failed: " << errnoWithDescription();
                }
                else {
                    log() << "kill_wrapper OpenEvent failed to open event to the process "
                        << pid.asUInt32()
                        << ". It has likely died already or server is running an older version."
                        << " Attempting to shutdown through admin command.";

                    // Back-off to the old way of shutting down the server on Windows, in case we
                    // are managing a pre-2.6.0rc0 service, which did not have the event.
                    //
                    try {
                        DBClientConnection conn;
                        conn.connect("127.0.0.1:" + BSONObjBuilder::numStr(port));

                        BSONElement authObj = opt["auth"];

                        if (!authObj.eoo()){
                            string errMsg;
                            conn.auth("admin", authObj["user"].String(),
                                authObj["pwd"].String(), errMsg);

                            if (!errMsg.empty()) {
                                cout << "Failed to authenticate before shutdown: "
                                    << errMsg << endl;
                            }
                        }

                        BSONObj info;
                        BSONObjBuilder b;
                        b.append("shutdown", 1);
                        b.append("force", 1);
                        conn.runCommand("admin", b.done(), info);
                    }
                    catch (...) {
                        // Do nothing. This command never returns data to the client and the driver
                        // doesn't like that.
                        //
                    }
                }
                return;
            }

            ON_BLOCK_EXIT(CloseHandle, event);

            bool result = SetEvent(event);
            if (!result) {
                error() << "kill_wrapper SetEvent failed: " << errnoWithDescription();
                return;
            }
#else
            int x = kill( pid.toNative(), sig );
            if ( x ) {
                if ( errno == ESRCH ) {
                }
                else {
                    log() << "killFailed: " << errnoWithDescription() << endl;
                    verify( x == 0 );
                }
            }

#endif
        }
예제 #7
0
파일: main.cpp 프로젝트: HongjianLi/USR-VS
int main(int argc, char* argv[])
{
	// Check the required number of command line arguments.
	if (argc != 5)
	{
		cout << "usr host user pwd jobs_path" << endl;
		return 0;
	}

	// Fetch command line arguments.
	const auto host = argv[1];
	const auto user = argv[2];
	const auto pwd = argv[3];
	const path jobs_path = argv[4];

	DBClientConnection conn;
	{
		// Connect to host and authenticate user.
		cout << local_time() << "Connecting to " << host << " and authenticating " << user << endl;
		string errmsg;
		if ((!conn.connect(host, errmsg)) || (!conn.auth("istar", user, pwd, errmsg)))
		{
			cerr << local_time() << errmsg << endl;
			return 1;
		}
	}

	// Initialize constants.
	cout << local_time() << "Initializing" << endl;
	const auto collection = "istar.usr2";
	const size_t num_usrs = 2;
	const array<string, 2> usr_names{{ "USR", "USRCAT" }};
	constexpr array<size_t, num_usrs> qn{{ 12, 60 }};
	constexpr array<double, num_usrs> qv{{ 1.0 / qn[0], 1.0 / qn[1] }};
	const size_t num_refPoints = 4;
	const size_t num_subsets = 5;
	const array<string, num_subsets> SubsetSMARTS
	{{
		"[!#1]", // heavy
		"[#6+0!$(*~[#7,#8,F]),SH0+0v2,s+0,S^3,Cl+0,Br+0,I+0]", // hydrophobic
		"[a]", // aromatic
		"[$([O,S;H1;v2]-[!$(*=[O,N,P,S])]),$([O,S;H0;v2]),$([O,S;-]),$([N&v3;H1,H2]-[!$(*=[O,N,P,S])]),$([N;v3;H0]),$([n,o,s;+0]),F]", // acceptor
		"[N!H0v3,N!H0+v4,OH+0,SH+0,nH+0]", // donor
	}};
	const size_t num_hits = 100;

	// Wrap SMARTS strings to RWMol objects.
	array<unique_ptr<ROMol>, num_subsets> SubsetMols;
	for (size_t k = 0; k < num_subsets; ++k)
	{
		SubsetMols[k].reset(reinterpret_cast<ROMol*>(SmartsToMol(SubsetSMARTS[k])));
	}

	// Read ZINC ID file.
	const string_array<size_t> zincids("16/zincid.txt");
	const auto num_ligands = zincids.size();
	cout << local_time() << "Found " << num_ligands << " database molecules" << endl;

	// Read SMILES file.
	const string_array<size_t> smileses("16/smiles.txt");
	assert(smileses.size() == num_ligands);

	// Read supplier file.
	const string_array<size_t> suppliers("16/supplier.txt");
	assert(suppliers.size() == num_ligands);

	// Read property files of floating point types and integer types.
	const auto zfproperties = read<array<float, 4>>("16/zfprop.f32");
	assert(zfproperties.size() == num_ligands);
	const auto ziproperties = read<array<int16_t, 5>>("16/ziprop.i16");
	assert(ziproperties.size() == num_ligands);

	// Read cumulative number of conformers file.
	const auto mconfss = read<size_t>("16/mconfs.u64");
	const auto num_conformers = mconfss.back();
	assert(mconfss.size() == num_ligands);
	assert(num_conformers >= num_ligands);
	cout << local_time() << "Found " << num_conformers << " database conformers" << endl;

	// Read feature file.
	const auto features = read<array<double, qn.back()>>("16/usrcat.f64");
	assert(features.size() == num_conformers);

	// Read ligand footer file and open ligand SDF file for seeking and reading.
	stream_array<size_t> ligands("16/ligand.sdf");
	assert(ligands.size() == num_conformers);

	// Initialize variables.
	array<vector<int>, num_subsets> subsets;
	array<vector<double>, num_refPoints> dista;
	alignas(32) array<double, qn.back()> q;

	// Initialize vectors to store compounds' primary score and their corresponding conformer.
	vector<double> scores(num_ligands); // Primary score of molecules.
	vector<size_t> cnfids(num_ligands); // ID of conformer with the best primary score.
	const auto compare = [&](const size_t val0, const size_t val1) // Sort by the primary score.
	{
		return scores[val0] < scores[val1];
	};

	// Initialize an io service pool and create worker threads for later use.
	const size_t num_threads = thread::hardware_concurrency();
	cout << local_time() << "Creating an io service pool of " << num_threads << " worker threads" << endl;
	io_service_pool io(num_threads);
	safe_counter<size_t> cnt;

	// Initialize the number of chunks and the number of molecules per chunk.
	const auto num_chunks = num_threads << 4;
	const auto chunk_size = 1 + (num_ligands - 1) / num_chunks;
	assert(chunk_size * num_chunks >= num_ligands);
	assert(chunk_size >= num_hits);
	cout << local_time() << "Using " << num_chunks << " chunks and a chunk size of " << chunk_size << endl;
	vector<size_t> scase(num_ligands);
	vector<size_t> zcase(num_hits * (num_chunks - 1) + min(num_hits, num_ligands - chunk_size * (num_chunks - 1))); // The last chunk might have fewer than num_hits records.

	// Enter event loop.
	cout << local_time() << "Entering event loop" << endl;
	cout.setf(ios::fixed, ios::floatfield);
	bool sleeping = false;
	while (true)
	{
		// Fetch an incompleted job in a first-come-first-served manner.
		if (!sleeping) cout << local_time() << "Fetching an incompleted job" << endl;
		BSONObj info;
		const auto started = milliseconds_since_epoch();
		conn.runCommand("istar", BSON("findandmodify" << "usr2" << "query" << BSON("started" << BSON("$exists" << false)) << "sort" << BSON("submitted" << 1) << "update" << BSON("$set" << BSON("started" << started))), info); // conn.findAndModify() is available since MongoDB C++ Driver legacy-1.0.0
		const auto value = info["value"];
		if (value.isNull())
		{
			// No incompleted jobs. Sleep for a while.
			if (!sleeping) cout << local_time() << "Sleeping" << endl;
			sleeping = true;
			this_thread::sleep_for(chrono::seconds(2));
			continue;
		}
		sleeping = false;
		const auto job = value.Obj();

		// Obtain job properties.
		const auto _id = job["_id"].OID();
		cout << local_time() << "Executing job " << _id.str() << endl;
		const auto job_path = jobs_path / _id.str();
		const size_t usr0 = job["usr"].Int(); // Specify the primary sorting score. 0: USR; 1: USRCAT.
		assert(usr0 == 0 || usr0 == 1);
		const auto usr1 = usr0 ^ 1;
		const auto qnu0 = qn[usr0];
		const auto qnu1 = qn[usr1];

		// Read and validate the user-supplied SDF file.
		cout << local_time() << "Reading and validating the query file" << endl;
		SDMolSupplier sup((job_path / "query.sdf").string(), true, false, true); // sanitize, removeHs, strictParsing
		if (!sup.length() || !sup.atEnd())
		{
			const auto error = 1;
			cout << local_time() << "Failed to parse the query file, error code = " << error << endl;
			conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("completed" << milliseconds_since_epoch() << "error" << error)));
			continue;
		}

		// Process each of the query molecules sequentially.
		const auto num_queries = 1; // Restrict the number of query molecules to 1. Setting num_queries = sup.length() to execute any number of query molecules.
		for (unsigned int query_number = 0; query_number < num_queries; ++query_number)
		{
			cout << local_time() << "Parsing query molecule " << query_number << endl;
			const unique_ptr<ROMol> qry_ptr(sup.next()); // Calling next() may print "ERROR: Could not sanitize molecule on line XXXX" to stderr.
			auto& qryMol = *qry_ptr;

			// Get the number of atoms, including and excluding hydrogens.
			const auto num_atoms = qryMol.getNumAtoms();
			const auto num_heavy_atoms = qryMol.getNumHeavyAtoms();
			assert(num_heavy_atoms);
			cout << local_time() << "Found " << num_atoms << " atoms and " << num_heavy_atoms << " heavy atoms" << endl;

			// Create an output directory.
			cout << local_time() << "Creating output directory" << endl;
			const auto output_dir = job_path / to_string(query_number);
			create_directory(output_dir);

			// Draw a SVG.
			cout << local_time() << "Drawing a SVG" << endl;
			{
				const unique_ptr<ROMol> qrz_ptr(removeHs(qryMol));
				auto& qrzMol = *qrz_ptr;
				compute2DCoords(qrzMol);
				boost::filesystem::ofstream ofs(output_dir / "query.svg");
				ofs << DrawingToSVG(MolToDrawing(qrzMol));
			}

			// Calculate Morgan fingerprint.
			cout << local_time() << "Calculating Morgan fingerprint" << endl;
			const unique_ptr<SparseIntVect<uint32_t>> qryFp(getFingerprint(qryMol, 2));

			// Classify atoms to pharmacophoric subsets.
			cout << local_time() << "Classifying atoms into subsets" << endl;
			for (size_t k = 0; k < num_subsets; ++k)
			{
				vector<vector<pair<int, int>>> matchVect;
				SubstructMatch(qryMol, *SubsetMols[k], matchVect);
				const auto num_matches = matchVect.size();
				auto& subset = subsets[k];
				subset.resize(num_matches);
				for (size_t i = 0; i < num_matches; ++i)
				{
					subset[i] = matchVect[i].front().second;
				}
				cout << local_time() << "Found " << num_matches << " atoms for subset " << k << endl;
			}
			const auto& subset0 = subsets.front();
			assert(subset0.size() == num_heavy_atoms);

			// Calculate the four reference points.
			cout << local_time() << "Calculating " << num_refPoints << " reference points" << endl;
			const auto qryRefPoints = calcRefPoints(qryMol, subset0);
			const Point3DConstPtrVect qryRefPointv
			{{
				&qryRefPoints[0],
				&qryRefPoints[1],
				&qryRefPoints[2],
				&qryRefPoints[3],
			}};

			// Precalculate the distances of heavy atoms to the reference points, given that subsets[1 to 4] are subsets of subsets[0].
			cout << local_time() << "Calculating " << num_heavy_atoms * num_refPoints << " pairwise distances" << endl;
			const auto& qryCnf = qryMol.getConformer();
			for (size_t k = 0; k < num_refPoints; ++k)
			{
				const auto& refPoint = qryRefPoints[k];
				auto& distp = dista[k];
				distp.resize(num_atoms);
				for (size_t i = 0; i < num_heavy_atoms; ++i)
				{
					distp[subset0[i]] = sqrt(dist2(qryCnf.getAtomPos(subset0[i]), refPoint));
				}
			}

			// Loop over pharmacophoric subsets and reference points.
			cout << local_time() << "Calculating " << 3 * num_refPoints * num_subsets << " moments of USRCAT feature" << endl;
			size_t qo = 0;
			for (const auto& subset : subsets)
			{
				const auto n = subset.size();
				for (size_t k = 0; k < num_refPoints; ++k)
				{
					// Load distances from precalculated ones.
					const auto& distp = dista[k];
					vector<double> dists(n);
					for (size_t i = 0; i < n; ++i)
					{
						dists[i] = distp[subset[i]];
					}

					// Compute moments.
					array<double, 3> m{};
					if (n > 2)
					{
						const auto v = 1.0 / n;
						for (size_t i = 0; i < n; ++i)
						{
							const auto d = dists[i];
							m[0] += d;
						}
						m[0] *= v;
						for (size_t i = 0; i < n; ++i)
						{
							const auto d = dists[i] - m[0];
							m[1] += d * d;
						}
						m[1] = sqrt(m[1] * v);
						for (size_t i = 0; i < n; ++i)
						{
							const auto d = dists[i] - m[0];
							m[2] += d * d * d;
						}
						m[2] = cbrt(m[2] * v);
					}
					else if (n == 2)
					{
						m[0] = 0.5 *     (dists[0] + dists[1]);
						m[1] = 0.5 * fabs(dists[0] - dists[1]);
					}
					else if (n == 1)
					{
						m[0] = dists[0];
					}
					for (const auto e : m)
					{
						q[qo++] = e;
					}
				}
			}
			assert(qo == qn.back());

			// Compute USR and USRCAT scores.
			cout << local_time() << "Calculating " << num_ligands << " " << usr_names[usr0] << " scores" << endl;
			scores.assign(scores.size(), numeric_limits<double>::max());
			iota(scase.begin(), scase.end(), 0);
			cnt.init(num_chunks);
			for (size_t l = 0; l < num_chunks; ++l)
			{
				io.post([&,l]()
				{
					// Loop over molecules of the current chunk.
					const auto chunk_beg = chunk_size * l;
					const auto chunk_end = min(chunk_beg + chunk_size, num_ligands);
					for (size_t k = chunk_beg; k < chunk_end; ++k)
					{
						// Loop over conformers of the current molecule and calculate their primary score.
						auto& scorek = scores[k];
						size_t j = k ? mconfss[k - 1] : 0;
						for (const auto mconfs = mconfss[k]; j < mconfs; ++j)
						{
							const auto& d = features[j];
							double s = 0;
							for (size_t i = 0; i < qnu0; ++i)
							{
								s += abs(q[i] - d[i]);
								if (s >= scorek) break;
							}
							if (s < scorek)
							{
								scorek = s;
								cnfids[k] = j;
							}
						}
					}

					// Sort the scores of molecules of the current chunk.
					sort(scase.begin() + chunk_beg, scase.begin() + chunk_end, compare);

					// Copy the indexes of top hits of the current chunk to a global vector for final sorting.
					copy_n(scase.begin() + chunk_beg, min(num_hits, chunk_end - chunk_beg), zcase.begin() + num_hits * l);

					cnt.increment();
				});
			}
			cnt.wait();

			// Sort the top hits from chunks.
			cout << local_time() << "Sorting " << zcase.size() << " hits by " << usr_names[usr0] << " score" << endl;
			sort(zcase.begin(), zcase.end(), compare);

			// Create output directory and write output files.
			cout << local_time() << "Writing output files" << endl;
			SDWriter hits_sdf((output_dir / "hits.sdf").string());
			boost::filesystem::ofstream hits_csv(output_dir / "hits.csv");
			hits_csv.setf(ios::fixed, ios::floatfield);
			hits_csv << "ZINC ID,USR score,USRCAT score,2D Tanimoto score,Molecular weight (g/mol),Partition coefficient xlogP,Apolar desolvation (kcal/mol),Polar desolvation (kcal/mol),Hydrogen bond donors,Hydrogen bond acceptors,Polar surface area tPSA (Å^2),Net charge,Rotatable bonds,SMILES,Vendors and annotations\n";
			for (size_t l = 0; l < num_hits; ++l)
			{
				// Obtain indexes to the hit molecule and the hit conformer.
				const auto k = zcase[l];
				const auto j = cnfids[k];

				// Read SDF content of the hit conformer.
				const auto lig = ligands[j];

				// Construct a RDKit ROMol object.
				istringstream iss(lig);
				SDMolSupplier sup(&iss, false, true, false, true);
				assert(sup.length() == 1);
				assert(sup.atEnd());
				const unique_ptr<ROMol> hit_ptr(sup.next());
				auto& hitMol = *hit_ptr;

				// Calculate Morgan fingerprint.
				const unique_ptr<SparseIntVect<uint32_t>> hitFp(getFingerprint(hitMol, 2));

				// Calculate Tanimoto similarity.
				const auto ts = TanimotoSimilarity(*qryFp, *hitFp);

				// Find heavy atoms.
				vector<vector<pair<int, int>>> matchVect;
				SubstructMatch(hitMol, *SubsetMols[0], matchVect);
				const auto num_matches = matchVect.size();
				assert(num_matches == hitMol.getNumHeavyAtoms());
				vector<int> hitHeavyAtoms(num_matches);
				for (size_t i = 0; i < num_matches; ++i)
				{
					hitHeavyAtoms[i] = matchVect[i].front().second;
					assert(hitHeavyAtoms[i] == i); // hitHeavyAtoms can be constructed using iota(hitHeavyAtoms.begin(), hitHeavyAtoms.end(), 0); because for RDKit-generated SDF molecules, heavy atom are always the first few atoms.
				}

				// Calculate the four reference points.
				const auto hitRefPoints = calcRefPoints(hitMol, hitHeavyAtoms);
				const Point3DConstPtrVect hitRefPointv
				{{
					&hitRefPoints[0],
					&hitRefPoints[1],
					&hitRefPoints[2],
					&hitRefPoints[3],
				}};

				// Calculate a 3D transform from the four reference points of the hit conformer to those of the query molecule.
				Transform3D trans;
				AlignPoints(qryRefPointv, hitRefPointv, trans);

				// Apply the 3D transform to all atoms of the hit conformer.
				auto& hitCnf = hitMol.getConformer();
				transformConformer(hitCnf, trans);

				// Write the aligned hit conformer.
				hits_sdf.write(hitMol);

				// Calculate the secondary score of the saved conformer, which has the best primary score.
				const auto& d = features[j];
				double s = 0;
				for (size_t i = 0; i < qnu1; ++i)
				{
					s += abs(q[i] - d[i]);
				}

				const auto u0score = 1 / (1 + scores[k] * qv[usr0]); // Primary score of the current molecule.
				const auto u1score = 1 / (1 + s         * qv[usr1]); // Secondary score of the current molecule.
				const auto zincid = zincids[k].substr(0, 8); // Take another substr() to get rid of the trailing newline.
				const auto zfp = zfproperties[k];
				const auto zip = ziproperties[k];
				const auto smiles = smileses[k];    // A newline is already included in smileses[k].
				const auto supplier = suppliers[k]; // A newline is already included in suppliers[k].
				hits_csv
					<< zincid
					<< setprecision(8)
					<< ',' << (usr1 ? u0score : u1score)
					<< ',' << (usr1 ? u1score : u0score)
					<< ',' << ts
					<< setprecision(3)
					<< ',' << zfp[0]
					<< ',' << zfp[1]
					<< ',' << zfp[2]
					<< ',' << zfp[3]
					<< ',' << zip[0]
					<< ',' << zip[1]
					<< ',' << zip[2]
					<< ',' << zip[3]
					<< ',' << zip[4]
					<< ',' << smiles.substr(0, smiles.length() - 1)     // Get rid of the trailing newline.
					<< ',' << supplier.substr(0, supplier.length() - 1) // Get rid of the trailing newline.
					<< '\n'
				;
			}
		}

		// Update job status.
		cout << local_time() << "Setting completed time" << endl;
		const auto completed = milliseconds_since_epoch();
		conn.update(collection, BSON("_id" << _id), BSON("$set" << BSON("completed" << completed << "nqueries" << num_queries)));

		// Calculate runtime in seconds and screening speed in million conformers per second.
		const auto runtime = (completed - started) * 0.001;
		const auto speed = num_conformers * 0.000001 * num_queries / runtime;
		cout
			<< local_time() << "Completed " << num_queries << " " << (num_queries == 1 ? "query" : "queries") << " in " << setprecision(3) << runtime << " seconds" << endl
			<< local_time() << "Screening speed was " << setprecision(0) << speed << " M conformers per second" << endl
		;
	}
}
예제 #8
0
    void DBClientReplicaSet::_auth( const BSONObj& params ) {

        // We prefer to authenticate against a primary, but otherwise a secondary is ok too
        // Empty tag matches every secondary
        shared_ptr<ReadPreferenceSetting> readPref(
            new ReadPreferenceSetting( ReadPreference_PrimaryPreferred, TagSet() ) );

        LOG(3) << "dbclient_rs authentication of " << _getMonitor()->getName() << endl;

        // NOTE that we retry MAX_RETRY + 1 times, since we're always primary preferred we don't
        // fallback to the primary.
        Status lastNodeStatus = Status::OK();
        for ( size_t retry = 0; retry < MAX_RETRY + 1; retry++ ) {
            try {
                DBClientConnection* conn = selectNodeUsingTags( readPref );

                if ( conn == NULL ) {
                    break;
                }

                conn->auth( params );

                // Cache the new auth information since we now validated it's good
                _auths[params[saslCommandUserDBFieldName].str()] = params.getOwned();

                // Ensure the only child connection open is the one we authenticated against - other
                // child connections may not have full authentication information.
                // NOTE: _lastSlaveOkConn may or may not be the same as _master
                dassert(_lastSlaveOkConn.get() == conn || _master.get() == conn);
                if ( conn != _lastSlaveOkConn.get() ) {
                    _lastSlaveOkHost = HostAndPort();
                    _lastSlaveOkConn.reset();
                }
                if ( conn != _master.get() ) {
                    _masterHost = HostAndPort();
                    _master.reset();
                }

                return;
            }
            catch ( const DBException &ex ) {

                // We care if we can't authenticate (i.e. bad password) in credential params.
                if ( isAuthenticationException( ex ) ) {
                    throw;
                }

                StringBuilder errMsgB;
                errMsgB << "can't authenticate against replica set node "
                        << _lastSlaveOkHost.toString();
                lastNodeStatus = ex.toStatus( errMsgB.str() );

                LOG(1) << lastNodeStatus.reason() << endl;
                invalidateLastSlaveOkCache();
            }
        }

        if ( lastNodeStatus.isOK() ) {
            StringBuilder assertMsgB;
            assertMsgB << "Failed to authenticate, no good nodes in " << _getMonitor()->getName();
            uasserted( ErrorCodes::NodeNotFound, assertMsgB.str() );
        }
        else {
            uasserted( lastNodeStatus.code(), lastNodeStatus.reason() );
        }
    }