示例#1
0
extern int slurm_persist_send_msg(
	slurm_persist_conn_t *persist_conn, Buf buffer)
{
	uint32_t msg_size, nw_size;
	char *msg;
	ssize_t msg_wrote;
	int rc, retry_cnt = 0;

	xassert(persist_conn);

	if (persist_conn->fd < 0)
		return EAGAIN;

	if (!buffer)
		return SLURM_ERROR;

	rc = slurm_persist_conn_writeable(persist_conn);
	if (rc == -1) {
	re_open:
		if (retry_cnt++ > 3)
			return EAGAIN;
		/* if errno is ACCESS_DENIED do not try to reopen to
		   connection just return that */
		if (errno == ESLURM_ACCESS_DENIED)
			return ESLURM_ACCESS_DENIED;

		if (persist_conn->flags & PERSIST_FLAG_RECONNECT) {
			slurm_persist_conn_reopen(persist_conn, true);
			rc = slurm_persist_conn_writeable(persist_conn);
		} else
			return SLURM_ERROR;
	}
	if (rc < 1)
		return EAGAIN;

	msg_size = get_buf_offset(buffer);
	nw_size = htonl(msg_size);
	msg_wrote = write(persist_conn->fd, &nw_size, sizeof(nw_size));
	if (msg_wrote != sizeof(nw_size))
		return EAGAIN;

	msg = get_buf_data(buffer);
	while (msg_size > 0) {
		rc = slurm_persist_conn_writeable(persist_conn);
		if (rc == -1)
			goto re_open;
		if (rc < 1)
			return EAGAIN;
		msg_wrote = write(persist_conn->fd, msg, msg_size);
		if (msg_wrote <= 0)
			return EAGAIN;
		msg += msg_wrote;
		msg_size -= msg_wrote;
	}

	return SLURM_SUCCESS;
}
示例#2
0
文件: backup.c 项目: chrisdukey/slurm
/* run_dbd_backup - this is the backup controller, it should run in standby
 *	mode, assuming control when the primary controller stops responding */
extern void run_dbd_backup(void)
{
	slurm_persist_conn_t slurmdbd_conn;

	primary_resumed = false;

	memset(&slurmdbd_conn, 0, sizeof(slurm_persist_conn_t));
	slurmdbd_conn.rem_host = slurmdbd_conf->dbd_addr;
	slurmdbd_conn.rem_port = slurmdbd_conf->dbd_port;
	slurmdbd_conn.cluster_name = "backup_slurmdbd";
	slurmdbd_conn.fd = -1;
	slurmdbd_conn.shutdown = &shutdown_time;

	slurm_persist_conn_open_without_init(&slurmdbd_conn);

	/* repeatedly ping Primary */
	while (!shutdown_time) {
		int writeable = slurm_persist_conn_writeable(&slurmdbd_conn);
		//info("%d %d", have_control, writeable);

		if (have_control && writeable == 1) {
			info("Primary has come back");
			primary_resumed = true;
			shutdown_threads();
			have_control = false;
			break;
		} else if (!have_control && writeable <= 0) {
			have_control = true;
			info("Taking Control");
			break;
		}

		sleep(1);
		if (writeable <= 0)
			slurm_persist_conn_reopen(&slurmdbd_conn, false);
	}

	slurm_persist_conn_close(&slurmdbd_conn);

	return;
}
示例#3
0
extern Buf slurm_persist_recv_msg(slurm_persist_conn_t *persist_conn)
{
	uint32_t msg_size, nw_size;
	char *msg;
	ssize_t msg_read, offset;
	Buf buffer;

	xassert(persist_conn);

	if (persist_conn->fd < 0)
		return NULL;

	if (!_conn_readable(persist_conn))
		goto endit;

	msg_read = read(persist_conn->fd, &nw_size, sizeof(nw_size));
	if (msg_read != sizeof(nw_size))
		goto endit;
	msg_size = ntohl(nw_size);
	/* We don't error check for an upper limit here
	 * since size could possibly be massive */
	if (msg_size < 2) {
		error("Persistent Conn: Invalid msg_size (%u)", msg_size);
		goto endit;
	}

	msg = xmalloc(msg_size);
	offset = 0;
	while (msg_size > offset) {
		if (!_conn_readable(persist_conn))
			break;		/* problem with this socket */
		msg_read = read(persist_conn->fd, (msg + offset),
				(msg_size - offset));
		if (msg_read <= 0) {
			error("Persistent Conn: read: %m");
			break;
		}
		offset += msg_read;
	}
	if (msg_size != offset) {
		if (!(*persist_conn->shutdown)) {
			error("Persistent Conn: only read %zd of %d bytes",
			      offset, msg_size);
		}	/* else in shutdown mode */
		xfree(msg);
		goto endit;
	}

	buffer = create_buf(msg, msg_size);
	return buffer;

endit:
	/* Close it since we abandoned it.  If the connection does still exist
	 * on the other end we can't rely on it after this point since we didn't
	 * listen long enough for this response.
	 */
	if (!(*persist_conn->shutdown) &&
	    persist_conn->flags & PERSIST_FLAG_RECONNECT)
		slurm_persist_conn_reopen(persist_conn, true);

	return NULL;
}