Exemplo n.º 1
0
extern int slurm_persist_send_msg(
	slurm_persist_conn_t *persist_conn, Buf buffer)
{
	uint32_t msg_size, nw_size;
	char *msg;
	ssize_t msg_wrote;
	int rc, retry_cnt = 0;

	xassert(persist_conn);

	if (persist_conn->fd < 0)
		return EAGAIN;

	if (!buffer)
		return SLURM_ERROR;

	rc = slurm_persist_conn_writeable(persist_conn);
	if (rc == -1) {
	re_open:
		if (retry_cnt++ > 3)
			return EAGAIN;
		/* if errno is ACCESS_DENIED do not try to reopen to
		   connection just return that */
		if (errno == ESLURM_ACCESS_DENIED)
			return ESLURM_ACCESS_DENIED;

		if (persist_conn->flags & PERSIST_FLAG_RECONNECT) {
			slurm_persist_conn_reopen(persist_conn, true);
			rc = slurm_persist_conn_writeable(persist_conn);
		} else
			return SLURM_ERROR;
	}
	if (rc < 1)
		return EAGAIN;

	msg_size = get_buf_offset(buffer);
	nw_size = htonl(msg_size);
	msg_wrote = write(persist_conn->fd, &nw_size, sizeof(nw_size));
	if (msg_wrote != sizeof(nw_size))
		return EAGAIN;

	msg = get_buf_data(buffer);
	while (msg_size > 0) {
		rc = slurm_persist_conn_writeable(persist_conn);
		if (rc == -1)
			goto re_open;
		if (rc < 1)
			return EAGAIN;
		msg_wrote = write(persist_conn->fd, msg, msg_size);
		if (msg_wrote <= 0)
			return EAGAIN;
		msg += msg_wrote;
		msg_size -= msg_wrote;
	}

	return SLURM_SUCCESS;
}
Exemplo n.º 2
0
/* run_dbd_backup - this is the backup controller, it should run in standby
 *	mode, assuming control when the primary controller stops responding */
extern void run_dbd_backup(void)
{
	slurm_persist_conn_t slurmdbd_conn;

	primary_resumed = false;

	memset(&slurmdbd_conn, 0, sizeof(slurm_persist_conn_t));
	slurmdbd_conn.rem_host = slurmdbd_conf->dbd_addr;
	slurmdbd_conn.rem_port = slurmdbd_conf->dbd_port;
	slurmdbd_conn.cluster_name = "backup_slurmdbd";
	slurmdbd_conn.fd = -1;
	slurmdbd_conn.shutdown = &shutdown_time;

	slurm_persist_conn_open_without_init(&slurmdbd_conn);

	/* repeatedly ping Primary */
	while (!shutdown_time) {
		int writeable = slurm_persist_conn_writeable(&slurmdbd_conn);
		//info("%d %d", have_control, writeable);

		if (have_control && writeable == 1) {
			info("Primary has come back");
			primary_resumed = true;
			shutdown_threads();
			have_control = false;
			break;
		} else if (!have_control && writeable <= 0) {
			have_control = true;
			info("Taking Control");
			break;
		}

		sleep(1);
		if (writeable <= 0)
			slurm_persist_conn_reopen(&slurmdbd_conn, false);
	}

	slurm_persist_conn_close(&slurmdbd_conn);

	return;
}
Exemplo n.º 3
0
static int _send_fini_msg(void)
{
	int rc;
	Buf buffer;
	dbd_fini_msg_t req;

	/* If the connection is already gone, we don't need to send a
	   fini. */
	if (slurm_persist_conn_writeable(slurmdbd_conn) == -1)
		return SLURM_SUCCESS;

	buffer = init_buf(1024);
	pack16((uint16_t) DBD_FINI, buffer);
	req.commit  = 0;
	req.close_conn   = 1;
	slurmdbd_pack_fini_msg(&req, SLURM_PROTOCOL_VERSION, buffer);

	rc = slurm_persist_send_msg(slurmdbd_conn, buffer);
	free_buf(buffer);

	return rc;
}