Ejemplo n.º 1
0
extern int slurm_persist_conn_reopen(slurm_persist_conn_t *persist_conn,
				     bool with_init)
{
	slurm_persist_conn_close(persist_conn);

	if (with_init)
		return slurm_persist_conn_open(persist_conn);
	else
		return slurm_persist_conn_open_without_init(persist_conn);
}
Ejemplo n.º 2
0
/* run_dbd_backup - this is the backup controller, it should run in standby
 *	mode, assuming control when the primary controller stops responding */
extern void run_dbd_backup(void)
{
	slurm_persist_conn_t slurmdbd_conn;

	primary_resumed = false;

	memset(&slurmdbd_conn, 0, sizeof(slurm_persist_conn_t));
	slurmdbd_conn.rem_host = slurmdbd_conf->dbd_addr;
	slurmdbd_conn.rem_port = slurmdbd_conf->dbd_port;
	slurmdbd_conn.cluster_name = "backup_slurmdbd";
	slurmdbd_conn.fd = -1;
	slurmdbd_conn.shutdown = &shutdown_time;

	slurm_persist_conn_open_without_init(&slurmdbd_conn);

	/* repeatedly ping Primary */
	while (!shutdown_time) {
		int writeable = slurm_persist_conn_writeable(&slurmdbd_conn);
		//info("%d %d", have_control, writeable);

		if (have_control && writeable == 1) {
			info("Primary has come back");
			primary_resumed = true;
			shutdown_threads();
			have_control = false;
			break;
		} else if (!have_control && writeable <= 0) {
			have_control = true;
			info("Taking Control");
			break;
		}

		sleep(1);
		if (writeable <= 0)
			slurm_persist_conn_reopen(&slurmdbd_conn, false);
	}

	slurm_persist_conn_close(&slurmdbd_conn);

	return;
}
Ejemplo n.º 3
0
/* Open a persistent socket connection
 * IN/OUT - persistent connection needing rem_host and rem_port filled in.
 * Returned completely filled in.
 * Returns SLURM_SUCCESS on success or SLURM_ERROR on failure */
extern int slurm_persist_conn_open(slurm_persist_conn_t *persist_conn)
{
	int rc = SLURM_ERROR;
	slurm_msg_t req_msg;
	persist_init_req_msg_t req;
	persist_rc_msg_t *resp = NULL;

	if (slurm_persist_conn_open_without_init(persist_conn) != SLURM_SUCCESS)
		return rc;

	slurm_msg_t_init(&req_msg);

	/* Always send the lowest protocol since we don't know what version the
	 * other side is running yet.
	 */
	req_msg.protocol_version = persist_conn->version;
	req_msg.msg_type = REQUEST_PERSIST_INIT;

	req_msg.flags |= SLURM_GLOBAL_AUTH_KEY;
	if (persist_conn->flags & PERSIST_FLAG_DBD)
		req_msg.flags |= SLURMDBD_CONNECTION;

	memset(&req, 0, sizeof(persist_init_req_msg_t));
	req.cluster_name = persist_conn->cluster_name;
	req.persist_type = persist_conn->persist_type;
	req.port = persist_conn->my_port;
	req.version = SLURM_PROTOCOL_VERSION;

	req_msg.data = &req;

	if (slurm_send_node_msg(persist_conn->fd, &req_msg) < 0) {
		error("%s: failed to send persistent connection init message to %s:%d",
		      __func__, persist_conn->rem_host, persist_conn->rem_port);
		_close_fd(&persist_conn->fd);
	} else {
		Buf buffer = slurm_persist_recv_msg(persist_conn);
		persist_msg_t msg;
		slurm_persist_conn_t persist_conn_tmp;

		if (!buffer) {
			if (_comm_fail_log(persist_conn)) {
				error("%s: No response to persist_init",
				      __func__);
			}
			_close_fd(&persist_conn->fd);
			goto end_it;
		}
		memset(&msg, 0, sizeof(persist_msg_t));
		memcpy(&persist_conn_tmp, persist_conn,
		       sizeof(slurm_persist_conn_t));
		/* The first unpack is done the same way for dbd or normal
		 * communication . */
		persist_conn_tmp.flags &= (~PERSIST_FLAG_DBD);
		rc = slurm_persist_msg_unpack(&persist_conn_tmp, &msg, buffer);
		free_buf(buffer);

		resp = (persist_rc_msg_t *)msg.data;
		if (resp && (rc == SLURM_SUCCESS)) {
			rc = resp->rc;
			persist_conn->version = resp->ret_info;
			persist_conn->flags |= resp->flags;
		}

		if (rc != SLURM_SUCCESS) {
			if (resp) {
				error("%s: Something happened with the receiving/processing of the persistent connection init message to %s:%d: %s",
				      __func__, persist_conn->rem_host,
				      persist_conn->rem_port, resp->comment);
			} else {
				error("%s: Failed to unpack persistent connection init resp message from %s:%d",
				      __func__,
				      persist_conn->rem_host,
				      persist_conn->rem_port);
			}
			_close_fd(&persist_conn->fd);
		}
	}

end_it:

	slurm_persist_free_rc_msg(resp);

	return rc;
}