Пример #1
0
int pmixp_server_health_chk(char *hostlist,  const char *addr)
{
	send_header_t hdr;
	char nhdr[sizeof(send_header_t)];
	size_t hsize;
	Buf buf = pmixp_server_new_buf();
	char *data = get_buf_data(buf);
	int rc;

	hdr.magic = PMIX_SERVER_MSG_MAGIC;
	hdr.type = PMIXP_MSG_HEALTH_CHK;
	hdr.msgsize = 1;
	hdr.seq = 0;
	/* Store global nodeid that is
	 *  independent from exact collective */
	hdr.nodeid = pmixp_info_nodeid_job();
	hsize = _send_pack_hdr(&hdr, nhdr);
	memcpy(data, nhdr, hsize);

	grow_buf(buf, sizeof(char));
	pack8('\n', buf);

	rc = pmixp_stepd_send(hostlist, addr, data, get_buf_offset(buf), 4, 14, 1);
	if (SLURM_SUCCESS != rc) {
		PMIXP_ERROR("Was unable to wait for the parent %s to become alive on addr %s",
			    hostlist, addr);
	}

	return rc;
}
Пример #2
0
Buf pmixp_server_buf_new(void)
{
	size_t offset = PMIXP_SERVER_BUFFER_OFFS;
	Buf buf = create_buf(xmalloc(offset), offset);
	uint32_t *service = (uint32_t*)get_buf_data(buf);
	/* Use the first size_t cell to identify the payload
	 * offset. Value 0 is special meaning that buffer wasn't
	 * yet finalized
	 */
	service[0] = 0;

#ifdef PMIXP_DEBUG_SERVER
	xassert( PMIXP_BASE_HDR_MAX >= sizeof(uint32_t));

	/* Makesure that we only use buffers allocated through
	 * this call, because we reserve the space for the
	 * header here
	 */
	service[1] = PMIXP_SERVER_BUF_MAGIC;
#endif

	/* Skip header. It will be filled right before the sending */
	set_buf_offset(buf, offset);
	return buf;
}
Пример #3
0
static int _send_resp(slurm_fd_t fd, Buf buffer)
{
	uint32_t msg_size, nw_size;
	ssize_t msg_wrote;
	char *out_buf;

	if ((fd < 0) || (!fd_writeable(fd)))
		goto io_err;

	msg_size = get_buf_offset(buffer);
	nw_size = htonl(msg_size);
	if (!fd_writeable(fd))
		goto io_err;
	msg_wrote = write(fd, &nw_size, sizeof(nw_size));
	if (msg_wrote != sizeof(nw_size))
		goto io_err;

	out_buf = get_buf_data(buffer);
	while (msg_size > 0) {
		if (!fd_writeable(fd))
			goto io_err;
		msg_wrote = write(fd, out_buf, msg_size);
		if (msg_wrote <= 0)
			goto io_err;
		out_buf  += msg_wrote;
		msg_size -= msg_wrote;
	}
	free_buf(buffer);
	return SLURM_SUCCESS;

io_err:
	free_buf(buffer);
	return SLURM_ERROR;
}
Пример #4
0
int
io_init_msg_write_to_fd(int fd, struct slurm_io_init_msg *msg)
{
	Buf buf;
	void *ptr;
	int n;

	xassert(msg);

	debug2("Entering io_init_msg_write_to_fd");
	msg->version = IO_PROTOCOL_VERSION;
	buf = init_buf(io_init_msg_packed_size());
	debug2("  msg->nodeid = %d", msg->nodeid);
	io_init_msg_pack(msg, buf);

	ptr = get_buf_data(buf);
again:
	if ((n = write(fd, ptr, io_init_msg_packed_size())) < 0) {
		if (errno == EINTR)
			goto again;
		free_buf(buf);
		return SLURM_ERROR;
	}
	if (n != io_init_msg_packed_size()) {
		error("io init msg write too small");
		free_buf(buf);
		return SLURM_ERROR;
	}

	free_buf(buf);
	debug2("Leaving  io_init_msg_write_to_fd");
	return SLURM_SUCCESS;
}
Пример #5
0
static void _dmdx_pmix_cb(pmix_status_t status, char *data, size_t sz,
		void *cbdata)
{
	dmdx_caddy_t *caddy = (dmdx_caddy_t *)cbdata;
	Buf buf = pmixp_server_new_buf();
	char *addr;
	int rc;

	/* setup response header */
	_setup_header(buf, DMDX_RESPONSE, caddy->proc.nspace, caddy->proc.rank,
			status);

	/* pack the response */
	packmem(data, sz, buf);

	/* setup response address */
	addr = pmixp_info_nspace_usock(caddy->sender_ns);

	/* send the request */
	rc = pmixp_server_send(caddy->sender_host, PMIXP_MSG_DMDX,
			caddy->seq_num, addr, get_buf_data(buf),
			get_buf_offset(buf), 1);
	if (SLURM_SUCCESS != rc) {
		/* not much we can do here. Caller will react by timeout */
		PMIXP_ERROR("Cannot send direct modex response to %s",
				caddy->sender_host);
	}
	xfree(addr);
	free_buf(buf);
	_dmdx_free_caddy(caddy);
}
Пример #6
0
extern int
name_unpublish_up(char *name)
{
	Buf buf = NULL, resp_buf = NULL;
	uint32_t size, tmp_32;
	int rc;

	buf = init_buf(1024);
	pack16((uint16_t)TREE_CMD_NAME_UNPUBLISH, buf);
	packstr(name, buf);
	size = get_buf_offset(buf);

	rc = tree_msg_to_srun_with_resp(size, get_buf_data(buf), &resp_buf);
	free_buf(buf);

	if (rc == SLURM_SUCCESS) {
		safe_unpack32(&tmp_32, resp_buf);
		rc = (int) tmp_32;
	}

unpack_error:
	if (resp_buf)
		free_buf(resp_buf);
	
	return rc;
}
Пример #7
0
static void _pack_buffer(void *in,
			 uint16_t rpc_version,
			 Buf buffer)
{
	Buf object = (Buf)in;

	packmem(get_buf_data(object), get_buf_offset(object), buffer);
}
Пример #8
0
extern int slurm_persist_send_msg(
	slurm_persist_conn_t *persist_conn, Buf buffer)
{
	uint32_t msg_size, nw_size;
	char *msg;
	ssize_t msg_wrote;
	int rc, retry_cnt = 0;

	xassert(persist_conn);

	if (persist_conn->fd < 0)
		return EAGAIN;

	if (!buffer)
		return SLURM_ERROR;

	rc = slurm_persist_conn_writeable(persist_conn);
	if (rc == -1) {
	re_open:
		if (retry_cnt++ > 3)
			return EAGAIN;
		/* if errno is ACCESS_DENIED do not try to reopen to
		   connection just return that */
		if (errno == ESLURM_ACCESS_DENIED)
			return ESLURM_ACCESS_DENIED;

		if (persist_conn->flags & PERSIST_FLAG_RECONNECT) {
			slurm_persist_conn_reopen(persist_conn, true);
			rc = slurm_persist_conn_writeable(persist_conn);
		} else
			return SLURM_ERROR;
	}
	if (rc < 1)
		return EAGAIN;

	msg_size = get_buf_offset(buffer);
	nw_size = htonl(msg_size);
	msg_wrote = write(persist_conn->fd, &nw_size, sizeof(nw_size));
	if (msg_wrote != sizeof(nw_size))
		return EAGAIN;

	msg = get_buf_data(buffer);
	while (msg_size > 0) {
		rc = slurm_persist_conn_writeable(persist_conn);
		if (rc == -1)
			goto re_open;
		if (rc < 1)
			return EAGAIN;
		msg_wrote = write(persist_conn->fd, msg, msg_size);
		if (msg_wrote <= 0)
			return EAGAIN;
		msg += msg_wrote;
		msg_size -= msg_wrote;
	}

	return SLURM_SUCCESS;
}
Пример #9
0
/*
 * save_cred_state - save the current credential list to a file
 * IN list - list of credentials
 * RET int - zero or error code
 */
int save_cred_state(slurm_cred_ctx_t ctx)
{
	char *old_file, *new_file, *reg_file;
	int cred_fd = -1, error_code = SLURM_SUCCESS, rc;
	Buf buffer = NULL;
	static pthread_mutex_t state_mutex = PTHREAD_MUTEX_INITIALIZER;

	old_file = xstrdup(conf->spooldir);
	xstrcat(old_file, "/cred_state.old");
	reg_file = xstrdup(conf->spooldir);
	xstrcat(reg_file, "/cred_state");
	new_file = xstrdup(conf->spooldir);
	xstrcat(new_file, "/cred_state.new");

	slurm_mutex_lock(&state_mutex);
	if ((cred_fd = creat(new_file, 0600)) < 0) {
		error("creat(%s): %m", new_file);
		if (errno == ENOSPC)
			_drain_node("SlurmdSpoolDir is full");
		error_code = errno;
		goto cleanup;
	}
	buffer = init_buf(1024);
	slurm_cred_ctx_pack(ctx, buffer);
	rc = write(cred_fd, get_buf_data(buffer), get_buf_offset(buffer));
	if (rc != get_buf_offset(buffer)) {
		error("write %s error %m", new_file);
		(void) unlink(new_file);
		if ((rc < 0) && (errno == ENOSPC))
			_drain_node("SlurmdSpoolDir is full");
		error_code = errno;
		goto cleanup;
	}
	(void) unlink(old_file);
	if (link(reg_file, old_file))
		debug4("unable to create link for %s -> %s: %m",
		       reg_file, old_file);
	(void) unlink(reg_file);
	if (link(new_file, reg_file))
		debug4("unable to create link for %s -> %s: %m",
		       new_file, reg_file);
	(void) unlink(new_file);

cleanup:
	slurm_mutex_unlock(&state_mutex);
	xfree(old_file);
	xfree(reg_file);
	xfree(new_file);
	if (buffer)
		free_buf(buffer);
	if (cred_fd > 0)
		close(cred_fd);
	return error_code;
}
Пример #10
0
int pmixp_dmdx_get(const char *nspace, int rank,
		   pmix_modex_cbfunc_t cbfunc, void *cbdata)
{
	dmdx_req_info_t *req;
	char *addr, *host;
	Buf buf;
	int rc;
	uint32_t seq;

	/* need to send the request */
	host = pmixp_nspace_resolve(nspace, rank);
	xassert(NULL != host);
	if (NULL == host) {
		return SLURM_ERROR;
	}

	buf = pmixp_server_new_buf();

	/* setup message header */
	_setup_header(buf, DMDX_REQUEST, nspace, rank, SLURM_SUCCESS);
	/* generate namespace usocket name */
	addr = pmixp_info_nspace_usock(nspace);
	/* store cur seq. num and move to the next request */
	seq = _dmdx_seq_num++;

	/* track this request */
	req = xmalloc(sizeof(dmdx_req_info_t));
	req->seq_num = seq;
	req->cbfunc = cbfunc;
	req->cbdata = cbdata;
	req->ts = time(NULL);
#ifndef NDEBUG
	strncpy(req->nspace, nspace, PMIX_MAX_NSLEN);
	req->rank = rank;
#endif
	list_append(_dmdx_requests, req);

	/* send the request */
	rc = pmixp_server_send(host, PMIXP_MSG_DMDX, seq, addr,
			get_buf_data(buf), get_buf_offset(buf), 1);

	/* cleanup the resources */
	xfree(addr);
	free_buf(buf);

	/* check the return status */
	if (SLURM_SUCCESS != rc) {
		PMIXP_ERROR("Cannot send direct modex request to %s", host);
		cbfunc(PMIX_ERROR, NULL, 0, cbdata, NULL, NULL);
		return SLURM_ERROR;
	}

	return rc;
}
Пример #11
0
static int _copy_payload(Buf inbuf, size_t offs, Buf *outbuf)
{
	size_t total_size, copy_size;
	char *ptr;
	pmix_proc_t *procs = NULL;
	size_t nprocs = 0;
	pmixp_coll_type_t type = 0;
	Buf buf;

	total_size = get_buf_offset(inbuf);
	set_buf_offset(inbuf, offs);
	int rc = pmixp_coll_unpack_ranges(inbuf, &type, &procs, &nprocs);
	xfree(procs);
	ptr = get_buf_data(inbuf) + get_buf_offset(inbuf);
	copy_size = total_size - get_buf_offset(inbuf);
	buf = init_buf(copy_size);
	memcpy(get_buf_data(buf), ptr, copy_size);
	*outbuf = buf;
	set_buf_offset(inbuf, total_size);
	return rc;
}
Пример #12
0
extern int archive_write_file(Buf buffer, char *cluster_name,
			      time_t period_start, time_t period_end,
			      char *arch_dir, char *arch_type,
			      uint32_t archive_period)
{
	int fd = 0;
	int rc = SLURM_SUCCESS;
	char *new_file = NULL;
	static pthread_mutex_t local_file_lock = PTHREAD_MUTEX_INITIALIZER;

	xassert(buffer);

	slurm_mutex_lock(&local_file_lock);

	/* write the buffer to file */
	new_file = _make_archive_name(period_start, period_end,
				      cluster_name, arch_dir,
				      arch_type, archive_period);
	if (!new_file) {
		error("%s: Unable to make archive file name.", __func__);
		return SLURM_ERROR;
	}

	debug("Storing %s archive for %s at %s",
	      arch_type, cluster_name, new_file);

	fd = creat(new_file, 0600);
	if (fd < 0) {
		error("Can't save archive, create file %s error %m", new_file);
		rc = SLURM_ERROR;
	} else {
		int amount;
		uint32_t pos = 0, nwrite = get_buf_offset(buffer);
		char *data = (char *)get_buf_data(buffer);
		while (nwrite > 0) {
			amount = write(fd, &data[pos], nwrite);
			if ((amount < 0) && (errno != EINTR)) {
				error("Error writing file %s, %m", new_file);
				rc = SLURM_ERROR;
				break;
			}
			nwrite -= amount;
			pos    += amount;
		}
		fsync(fd);
		close(fd);
	}

	xfree(new_file);
	slurm_mutex_unlock(&local_file_lock);

	return rc;
}
Пример #13
0
/*
 *
 * Returns SLURM_SUCCESS if successful.  On error returns SLURM_ERROR
 * and sets errno.
 */
int
stepd_completion(int fd, uint16_t protocol_version, step_complete_msg_t *sent)
{
	int req = REQUEST_STEP_COMPLETION_V2;
	int rc;
	int errnum = 0;
	Buf buffer;
	int len = 0;

	buffer = init_buf(0);

	debug("Entering stepd_completion for %u.%u, range_first = %d, range_last = %d",
	      sent->job_id, sent->job_step_id,
	      sent->range_first, sent->range_last);

	if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) {
		safe_write(fd, &req, sizeof(int));
		safe_write(fd, &sent->range_first, sizeof(int));
		safe_write(fd, &sent->range_last, sizeof(int));
		safe_write(fd, &sent->step_rc, sizeof(int));

		/*
		 * We must not use setinfo over a pipe with slurmstepd here
		 * Indeed, slurmd does a large use of getinfo over a pipe
		 * with slurmstepd and doing the reverse can result in
		 * a deadlock scenario with slurmstepd :
		 * slurmd(lockforread,write)/slurmstepd(write,lockforread)
		 * Do pack/unpack instead to be sure of independances of
		 * slurmd and slurmstepd
		 */
		jobacctinfo_pack(sent->jobacct, protocol_version,
				 PROTOCOL_TYPE_SLURM, buffer);
		len = get_buf_offset(buffer);
		safe_write(fd, &len, sizeof(int));
		safe_write(fd, get_buf_data(buffer), len);
		free_buf(buffer);

		/* Receive the return code and errno */
		safe_read(fd, &rc, sizeof(int));
		safe_read(fd, &errnum, sizeof(int));
	} else {
		error("%s: bad protocol version %hu",
		      __func__, protocol_version);
		rc = SLURM_ERROR;
	}

	errno = errnum;
	return rc;

rwfail:
	FREE_NULL_BUFFER(buffer);
	return -1;
}
Пример #14
0
/*
 * switch functions for global state save/restore
 */
int switch_p_libstate_save(char *dir_name)
{
#ifdef HAVE_NATIVE_CRAY
    Buf buffer;
    char *file_name;
    int ret = SLURM_SUCCESS;
    int state_fd;

    xassert(dir_name != NULL);

    if (debug_flags & DEBUG_FLAG_SWITCH)
        CRAY_INFO("save to %s", dir_name);

    buffer = init_buf(SWITCH_BUF_SIZE);
    _state_write_buf(buffer);
    file_name = xstrdup(dir_name);
    xstrcat(file_name, "/switch_cray_state");
    (void) unlink(file_name);
    state_fd = creat(file_name, 0600);
    if (state_fd < 0) {
        CRAY_ERR("Can't save state, error creating file %s %m",
                 file_name);
        ret = SLURM_ERROR;
    } else {
        char  *buf = get_buf_data(buffer);
        size_t len = get_buf_offset(buffer);
        while (1) {
            int wrote = write(state_fd, buf, len);
            if ((wrote < 0) && (errno == EINTR))
                continue;
            if (wrote == 0)
                break;
            if (wrote < 0) {
                CRAY_ERR("Can't save switch state: %m");
                ret = SLURM_ERROR;
                break;
            }
            buf += wrote;
            len -= wrote;
        }
        close(state_fd);
    }
    xfree(file_name);

    if (buffer)
        free_buf(buffer);

    return ret;
#else
    return SLURM_SUCCESS;
#endif
}
Пример #15
0
static int _ring_forward_data(pmixp_coll_ring_ctx_t *coll_ctx, uint32_t contrib_id,
			      uint32_t hop_seq, void *data, size_t size)
{
	pmixp_coll_ring_msg_hdr_t hdr;
	pmixp_coll_t *coll = _ctx_get_coll(coll_ctx);
	pmixp_coll_ring_t *ring = &coll->state.ring;
	hdr.nodeid = coll->my_peerid;
	hdr.msgsize = size;
	hdr.seq = coll_ctx->seq;
	hdr.hop_seq = hop_seq;
	hdr.contrib_id = contrib_id;
	pmixp_ep_t *ep = (pmixp_ep_t*)xmalloc(sizeof(*ep));
	pmixp_coll_ring_cbdata_t *cbdata = NULL;
	uint32_t offset = 0;
	Buf buf = _get_fwd_buf(coll_ctx);
	int rc = SLURM_SUCCESS;


	pmixp_coll_ring_ctx_sanity_check(coll_ctx);

#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: transit data to nodeid=%d, seq=%d, hop=%d, size=%lu, contrib=%d",
		    coll_ctx, _ring_next_id(coll), hdr.seq,
		    hdr.hop_seq, hdr.msgsize, hdr.contrib_id);
#endif
	if (!buf) {
		rc = SLURM_ERROR;
		goto exit;
	}
	ep->type = PMIXP_EP_NOIDEID;
	ep->ep.nodeid = ring->next_peerid;

	/* pack ring info */
	_pack_coll_ring_info(coll, &hdr, buf);

	/* insert payload to buf */
	offset = get_buf_offset(buf);
	pmixp_server_buf_reserve(buf, size);
	memcpy(get_buf_data(buf) + offset, data, size);
	set_buf_offset(buf, offset + size);

	cbdata = xmalloc(sizeof(pmixp_coll_ring_cbdata_t));
	cbdata->buf = buf;
	cbdata->coll = coll;
	cbdata->coll_ctx = coll_ctx;
	cbdata->seq = coll_ctx->seq;
	rc = pmixp_server_send_nb(ep, PMIXP_MSG_RING, coll_ctx->seq, buf,
				  _ring_sent_cb, cbdata);
exit:
	return rc;
}
Пример #16
0
static void *_buf_finalize(Buf buf, void *nhdr, size_t hsize,
			   size_t *dsize)
{
	size_t offset;
	uint32_t *service = (uint32_t*)get_buf_data(buf);
	char *ptr = get_buf_data(buf);
	if (!service[0]) {
		offset = PMIXP_SERVER_BUFFER_OFFS - hsize;
#ifdef PMIXP_DEBUG_SERVER
		xassert(PMIXP_BASE_HDR_MAX >= hsize);
		xassert(PMIXP_BASE_HDR_MAX <= get_buf_offset(buf));
		/* Makesure that we only use buffers allocated through
		 * this call, because we reserve the space for the
		 * header here
		 */
		xassert(PMIXP_SERVER_BUF_MAGIC == service[1]);
#endif
		/* Enough space for any header was reserved at the
		 * time of buffer initialization in `pmixp_server_new_buf`
		 * put the header in place and return proper pointer
		 */
		if (hsize) {
			memcpy(ptr + offset, nhdr, hsize);
		}
		service[0] = offset;
	} else {
		/* This buffer was already finalized */
		offset = service[0];
#ifdef PMIXP_DEBUG_SERVER
		/* We expect header to be the same */
		xassert(0 == memcmp(ptr+offset, nhdr, hsize));
#endif
	}
	*dsize = get_buf_offset(buf) - offset;
	return ptr + offset;
}
Пример #17
0
/****************************************************************************
 * Functions for agent to manage queue of pending message for the Slurm DBD
 ****************************************************************************/
static Buf _load_dbd_rec(int fd)
{
	ssize_t size, rd_size;
	uint32_t msg_size, magic;
	char *msg;
	Buf buffer;

	size = sizeof(msg_size);
	rd_size = read(fd, &msg_size, size);
	if (rd_size == 0)
		return (Buf) NULL;
	if (rd_size != size) {
		error("slurmdbd: state recover error: %m");
		return (Buf) NULL;
	}
	if (msg_size > MAX_DBD_MSG_LEN) {
		error("slurmdbd: state recover error, msg_size=%u", msg_size);
		return (Buf) NULL;
	}

	buffer = init_buf((int) msg_size);
	set_buf_offset(buffer, msg_size);
	msg = get_buf_data(buffer);
	size = msg_size;
	while (size) {
		rd_size = read(fd, msg, size);
		if ((rd_size > 0) && (rd_size <= size)) {
			msg += rd_size;
			size -= rd_size;
		} else if ((rd_size == -1) && (errno == EINTR))
			continue;
		else {
			error("slurmdbd: state recover error: %m");
			free_buf(buffer);
			return (Buf) NULL;
		}
	}

	size = sizeof(magic);
	rd_size = read(fd, &magic, size);
	if ((rd_size != size) || (magic != DBD_MAGIC)) {
		error("slurmdbd: state recover error");
		free_buf(buffer);
		return (Buf) NULL;
	}

	return buffer;
}
Пример #18
0
size_t pmixp_server_buf_reset(Buf buf)
{
	uint32_t *service = (uint32_t*)get_buf_data(buf);
	service[0] = 0;
#ifdef PMIXP_DEBUG_SERVER
	xassert( PMIXP_BASE_HDR_MAX >= sizeof(uint32_t));
	xassert( PMIXP_BASE_HDR_MAX <= get_buf_offset(buf) );
	/* Makesure that we only use buffers allocated through
	 * this call, because we reserve the space for the
	 * header here
	 */
	service[1] = PMIXP_SERVER_BUF_MAGIC;
#endif
	set_buf_offset(buf, PMIXP_SERVER_BUFFER_OFFS);
	return PMIXP_SERVER_BUFFER_OFFS;
}
Пример #19
0
extern int
temp_kvs_init(void)
{
	uint16_t cmd;
	uint32_t nodeid, num_children, size;
	Buf buf = NULL;

	xfree(temp_kvs_buf);
	temp_kvs_cnt = 0;
	temp_kvs_size = TEMP_KVS_SIZE_INC;
	temp_kvs_buf = xmalloc(temp_kvs_size);

	/* put the tree cmd here to simplify message sending */
	if (in_stepd()) {
		cmd = TREE_CMD_KVS_FENCE;
	} else {
		cmd = TREE_CMD_KVS_FENCE_RESP;
	}

	buf = init_buf(1024);
	pack16(cmd, buf);
	if (in_stepd()) {
		nodeid = job_info.nodeid;
		/* XXX: TBC */
		num_children = tree_info.num_children + 1;

		pack32(nodeid, buf); /* from_nodeid */
		packstr(tree_info.this_node, buf); /* from_node */
		pack32(num_children, buf); /* num_children */
		pack32(kvs_seq, buf);
	} else {
		pack32(kvs_seq, buf);
	}
	size = get_buf_offset(buf);
	if (temp_kvs_cnt + size > temp_kvs_size) {
		temp_kvs_size += TEMP_KVS_SIZE_INC;
		xrealloc(temp_kvs_buf, temp_kvs_size);
	}
	memcpy(&temp_kvs_buf[temp_kvs_cnt], get_buf_data(buf), size);
	temp_kvs_cnt += size;
	free_buf(buf);

	tasks_to_wait = 0;
	children_to_wait = 0;

	return SLURM_SUCCESS;
}
Пример #20
0
extern int
spawn_resp_send_to_srun(spawn_resp_t *resp)
{
	Buf buf;
	int rc;
	uint16_t cmd;

	buf = init_buf(1024);

	cmd = TREE_CMD_SPAWN_RESP;
	pack16(cmd, buf);
	spawn_resp_pack(resp, buf);

	rc = tree_msg_to_srun(get_buf_offset(buf), get_buf_data(buf));
	free_buf(buf);
	return rc;
}
Пример #21
0
extern int
spawn_resp_send_to_fd(spawn_resp_t *resp, int fd)
{
	Buf buf;
	int rc;

	buf = init_buf(1024);

	/* sync with spawn_req_send_to_srun */
/* 	cmd = TREE_CMD_SPAWN_RESP; */
/* 	pack16(cmd, buf); */
	spawn_resp_pack(resp, buf);
	rc = slurm_msg_sendto(fd, get_buf_data(buf), get_buf_offset(buf),
			      SLURM_PROTOCOL_NO_SEND_RECV_FLAGS);
	free_buf(buf);

	return rc;
}
Пример #22
0
/*
 *
 * Returns SLURM_SUCCESS if successful.  On error returns SLURM_ERROR
 * and sets errno.
 */
int
stepd_completion(int fd, step_complete_msg_t *sent)
{
	int req = REQUEST_STEP_COMPLETION_V2;
	int rc;
	int errnum = 0;
	Buf buffer;
	int len = 0;
	int version = SLURM_PROTOCOL_VERSION;

	buffer = init_buf(0);

	debug("Entering stepd_completion, range_first = %d, range_last = %d",
	      sent->range_first, sent->range_last);
	safe_write(fd, &req, sizeof(int));
	safe_write(fd, &version, sizeof(int));
	safe_write(fd, &sent->range_first, sizeof(int));
	safe_write(fd, &sent->range_last, sizeof(int));
	safe_write(fd, &sent->step_rc, sizeof(int));
	/*
	 * We must not use setinfo over a pipe with slurmstepd here 
	 * Indeed, slurmd does a large use of getinfo over a pipe
	 * with slurmstepd and doing the reverse can result in a deadlock
	 * scenario with slurmstepd : 
	 * slurmd(lockforread,write)/slurmstepd(write,lockforread)
	 * Do pack/unpack instead to be sure of independances of 
	 * slurmd and slurmstepd
	 */
	jobacctinfo_pack(sent->jobacct, SLURM_PROTOCOL_VERSION,
			 PROTOCOL_TYPE_SLURM, buffer);
	len = get_buf_offset(buffer);
	safe_write(fd, &len, sizeof(int));
	safe_write(fd, get_buf_data(buffer), len);
	free_buf(buffer);

	/* Receive the return code and errno */
	safe_read(fd, &rc, sizeof(int));
	safe_read(fd, &errnum, sizeof(int));

	errno = errnum;
	return rc;
rwfail:
	return -1;
}
Пример #23
0
/* save and purge the libstate if free_flag is true */
static int _switch_p_libstate_save ( char * dir_name, bool free_flag )
{
	Buf buffer;
	char *file_name;
	int ret = SLURM_SUCCESS;
	int state_fd;

	buffer = init_buf(NRT_LIBSTATE_LEN);
	(void) nrt_libstate_save(buffer, free_flag);
	file_name = xstrdup(dir_name);
	xstrcat(file_name, "/nrt_state");
	(void) unlink(file_name);
	state_fd = creat(file_name, 0600);
	if (state_fd < 0) {
		error("Can't save state, error creating file %s %m",
		      file_name);
		ret = SLURM_ERROR;
	} else {
		char  *buf = get_buf_data(buffer);
		size_t len = get_buf_offset(buffer);
		while (1) {
	  		int wrote = write (state_fd, buf, len);
			if ((wrote < 0) && (errno == EINTR))
				continue;
	 		if (wrote == 0)
		 		break;
			if (wrote < 0) {
				error("Can't save switch state: %m");
				ret = SLURM_ERROR;
				break;
			}
			buf += wrote;
			len -= wrote;
		}
		close(state_fd);
	}
	xfree(file_name);

	if (buffer)
		free_buf(buffer);

	return ret;
}
Пример #24
0
static void _respond_with_error(int seq_num, char *sender_host,
				char *sender_ns, int status)
{
	Buf buf = create_buf(NULL, 0);
	char *addr;
	int rc;

	/* rank doesn't matter here, don't send it */
	_setup_header(buf, DMDX_RESPONSE, pmixp_info_namespace(), -1, status);
	/* generate namespace usocket name */
	addr = pmixp_info_nspace_usock(sender_ns);
	/* send response */
	rc = pmixp_server_send(sender_host, PMIXP_MSG_DMDX, seq_num, addr,
			get_buf_data(buf), get_buf_offset(buf), 1);
	if (SLURM_SUCCESS != rc) {
		PMIXP_ERROR("Cannot send direct modex error" " response to %s",
				sender_host);
	}
	xfree(addr);
	free_buf(buf);
}
Пример #25
0
extern int
spawn_req_send_to_srun(spawn_req_t *req, spawn_resp_t **resp_ptr)
{
	Buf req_buf = NULL, resp_buf = NULL;
	int rc;
	uint16_t cmd;

	req_buf = init_buf(2048);
	cmd = TREE_CMD_SPAWN;
	pack16(cmd, req_buf);
	spawn_req_pack(req, req_buf);
	rc = tree_msg_to_srun_with_resp(get_buf_offset(req_buf),
					get_buf_data(req_buf), &resp_buf);
	free_buf(req_buf);

	if (rc == SLURM_SUCCESS) {
		rc = spawn_resp_unpack(resp_ptr, resp_buf);
		free_buf(resp_buf);
	}
	return rc;
}
Пример #26
0
extern int
temp_kvs_merge(Buf buf)
{
	char *data;
	uint32_t offset, size;

	size = remaining_buf(buf);
	if (size == 0) {
		return SLURM_SUCCESS;
	}
	data = get_buf_data(buf);
	offset = get_buf_offset(buf);

	if (temp_kvs_cnt + size > temp_kvs_size) {
		temp_kvs_size += size;
		xrealloc(temp_kvs_buf, temp_kvs_size);
	}
	memcpy(&temp_kvs_buf[temp_kvs_cnt], &data[offset], size);
	temp_kvs_cnt += size;

	return SLURM_SUCCESS;
}
Пример #27
0
inline static int _pmixp_coll_contrib(pmixp_coll_ring_ctx_t *coll_ctx,
				      int contrib_id,
				      uint32_t hop, char *data, size_t size)
{
	pmixp_coll_t *coll = _ctx_get_coll(coll_ctx);
	char *data_ptr = NULL;
	int ret;

	/* change the state */
	coll->ts = time(NULL);

	/* save contribution */
	if (!size_buf(coll_ctx->ring_buf)) {
		grow_buf(coll_ctx->ring_buf, size * coll->peers_cnt);
	} else if(remaining_buf(coll_ctx->ring_buf) < size) {
		uint32_t new_size = size_buf(coll_ctx->ring_buf) + size *
			_ring_remain_contrib(coll_ctx);
		grow_buf(coll_ctx->ring_buf, new_size);
	}
	grow_buf(coll_ctx->ring_buf, size);
	data_ptr = get_buf_data(coll_ctx->ring_buf) +
		get_buf_offset(coll_ctx->ring_buf);
	memcpy(data_ptr, data, size);
	set_buf_offset(coll_ctx->ring_buf,
		       get_buf_offset(coll_ctx->ring_buf) + size);

	/* check for ring is complete */
	if (contrib_id != _ring_next_id(coll)) {
		/* forward data to the next node */
		ret = _ring_forward_data(coll_ctx, contrib_id, hop,
					 data_ptr, size);
		if (ret) {
			PMIXP_ERROR("Cannot forward ring data");
			return SLURM_ERROR;
		}
	}

	return SLURM_SUCCESS;
}
Пример #28
0
static int _progress_ufwd_wpc(pmixp_coll_t *coll)
{
	xassert(PMIXP_COLL_UPFWD_WPC == coll->state);

	if (!coll->contrib_prnt) {
		return false;
	}

	/* Need to wait only for the local completion callback if installed*/
	coll->dfwd_status = PMIXP_COLL_SND_ACTIVE;
	coll->dfwd_cb_wait = 0;


	/* move to the next state */
	coll->state = PMIXP_COLL_DOWNFWD;

	/* local delivery */
	if (coll->cbfunc) {
		pmixp_coll_cbdata_t *cbdata;
		cbdata = xmalloc(sizeof(pmixp_coll_cbdata_t));
		cbdata->coll = coll;
		cbdata->seq = coll->seq;
		cbdata->refcntr = 1;

		char *data = get_buf_data(coll->dfwd_buf) + coll->dfwd_offset;
		size_t size = get_buf_offset(coll->dfwd_buf) -
				coll->dfwd_offset;
		coll->cbfunc(PMIX_SUCCESS, data, size, coll->cbdata,
			     _libpmix_cb, (void *)cbdata);
		coll->dfwd_cb_wait++;
#ifdef PMIXP_COLL_DEBUG
		PMIXP_DEBUG("%p: local delivery, size = %lu",
			    coll, (uint64_t)size);
#endif
	}

	/* events observed - need another iteration */
	return true;
}
Пример #29
0
extern int
spawn_resp_send_to_stepd(spawn_resp_t *resp, char *node)
{
	Buf buf;
	int rc;
	uint16_t cmd;
	hostlist_t hl;

	buf = init_buf(1024);

	cmd = TREE_CMD_SPAWN_RESP;
	pack16(cmd, buf);
	spawn_resp_pack(resp, buf);

	hl = hostlist_create(node);
	rc = tree_msg_to_stepds(hl, 
				get_buf_offset(buf),
				get_buf_data(buf));
	hostlist_destroy(hl);
	free_buf(buf);
	return rc;
}
Пример #30
0
int pmixp_coll_contrib_local(pmixp_coll_t *coll, char *data, size_t size)
{
	PMIXP_DEBUG("%s:%d: get local contribution", pmixp_info_namespace(),
			pmixp_info_nodeid());

	/* sanity check */
	pmixp_coll_sanity_check(coll);

	/* lock the structure */
	slurm_mutex_lock(&coll->lock);

	/* change the collective state if need */
	if (PMIXP_COLL_SYNC == coll->state) {
		PMIXP_DEBUG(
				"%s:%d: get local contribution: switch to PMIXP_COLL_FAN_IN",
				pmixp_info_namespace(), pmixp_info_nodeid());
		coll->state = PMIXP_COLL_FAN_IN;
		coll->ts = time(NULL);
	}
	xassert(PMIXP_COLL_FAN_IN == coll->state);

	/* save & mark local contribution */
	coll->contrib_local = true;
	grow_buf(coll->buf, size);
	memcpy(get_buf_data(coll->buf) + get_buf_offset(coll->buf), data, size);
	set_buf_offset(coll->buf, get_buf_offset(coll->buf) + size);

	/* unlock the structure */
	slurm_mutex_unlock(&coll->lock);

	/* check if the collective is ready to progress */
	_progress_fan_in(coll);

	PMIXP_DEBUG("%s:%d: get local contribution: finish",
			pmixp_info_namespace(), pmixp_info_nodeid());

	return SLURM_SUCCESS;
}