int pmixp_server_send(char *hostlist, pmixp_srv_cmd_t type, uint32_t seq,
		const char *addr, void *data, size_t size, int p2p)
{
	send_header_t hdr;
	char nhdr[sizeof(send_header_t)];
	size_t hsize;
	int rc;

	hdr.magic = PMIX_SERVER_MSG_MAGIC;
	hdr.type = type;
	hdr.msgsize = size - SEND_HDR_SIZE;
	hdr.seq = seq;
	/* Store global nodeid that is
	 *  independent from exact collective */
	hdr.nodeid = pmixp_info_nodeid_job();
	hsize = _send_pack_hdr(&hdr, nhdr);
	memcpy(data, nhdr, hsize);

	if( !p2p ){
		rc = pmixp_stepd_send(hostlist, addr, data, size, 500, 7, 0);
	} else {
		rc = pmixp_p2p_send(hostlist, addr, data, size, 500, 7, 0);
	}
	if (SLURM_SUCCESS != rc) {
		PMIXP_ERROR("Cannot send message to %s, size = %u, hostlist:\n%s",
			    addr, (uint32_t) size, hostlist);
	}
	return rc;
}
示例#2
0
void pmixp_coll_ring_reset_if_to(pmixp_coll_t *coll, time_t ts) {
	pmixp_coll_ring_ctx_t *coll_ctx;
	int i;

	/* lock the structure */
	slurm_mutex_lock(&coll->lock);
	for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) {
		coll_ctx = &coll->state.ring.ctx_array[i];
		if (!coll_ctx->in_use ||
		    (PMIXP_COLL_RING_SYNC == coll_ctx->state)) {
			continue;
		}
		if (ts - coll->ts > pmixp_info_timeout()) {
			/* respond to the libpmix */
			if (coll_ctx->contrib_local && coll->cbfunc) {
				pmixp_lib_modex_invoke(coll->cbfunc,
						       PMIXP_ERR_TIMEOUT, NULL,
						       0, coll->cbdata,
						       NULL, NULL);
			}
			/* report the timeout event */
			PMIXP_ERROR("%p: collective timeout seq=%d",
				    coll, coll_ctx->seq);
			pmixp_coll_log(coll);
			/* drop the collective */
			_reset_coll_ring(coll_ctx);
		}
	}
	/* unlock the structure */
	slurm_mutex_unlock(&coll->lock);
}
示例#3
0
int pmixp_dconn_init(int node_cnt, pmixp_p2p_data_t direct_hdr)
{
	int i;
	memset(&_pmixp_dconn_h, 0, sizeof(_pmixp_dconn_h));

#ifdef HAVE_UCX
	if (pmixp_info_srv_direct_conn_ucx()) {
		_poll_fd = pmixp_dconn_ucx_prepare(&_pmixp_dconn_h,
						   &ep_data, &ep_len);
		_progress_type = PMIXP_DCONN_PROGRESS_HW;
		_conn_type = PMIXP_DCONN_CONN_TYPE_ONESIDE;
	} else
#endif
	{
		_poll_fd = pmixp_dconn_tcp_prepare(&_pmixp_dconn_h,
						   &ep_data, &ep_len);
		_progress_type = PMIXP_DCONN_PROGRESS_SW;
		_conn_type = PMIXP_DCONN_CONN_TYPE_TWOSIDE;
	}

	if (SLURM_ERROR == _poll_fd) {
		PMIXP_ERROR("Cannot get polling fd");
		return SLURM_ERROR;
	}
	_pmixp_dconn_conns = xmalloc(sizeof(*_pmixp_dconn_conns) * node_cnt);
	_pmixp_dconn_conn_cnt = node_cnt;
	for (i=0; i<_pmixp_dconn_conn_cnt; i++) {
		slurm_mutex_init(&_pmixp_dconn_conns[i].lock);
		_pmixp_dconn_conns[i].nodeid = i;
		_pmixp_dconn_conns[i].state = PMIXP_DIRECT_INIT;
		_pmixp_dconn_conns[i].priv = _pmixp_dconn_h.init(i, direct_hdr);
	}
	return SLURM_SUCCESS;
}
示例#4
0
int pmixp_coll_unpack_ranges(Buf buf, pmixp_coll_type_t *type,
		pmix_proc_t **r, size_t *nr)
{
	pmix_proc_t *procs = NULL;
	uint32_t nprocs = 0;
	uint32_t tmp;
	int i, rc;

	/* 1. extract the type of collective */
	if (SLURM_SUCCESS != (rc = unpack32(&tmp, buf))) {
		PMIXP_ERROR("Cannot unpack collective type");
		return rc;
	}
	*type = tmp;

	/* 2. get the number of ranges */
	if (SLURM_SUCCESS != (rc = unpack32(&nprocs, buf))) {
		PMIXP_ERROR("Cannot unpack collective type");
		return rc;
	}
	*nr = nprocs;

	procs = xmalloc(sizeof(pmix_proc_t) * nprocs);
	*r = procs;

	for (i = 0; i < (int)nprocs; i++) {
		/* 3. get namespace/rank of particular process */
		rc = unpackmem(procs[i].nspace, &tmp, buf);
		if (SLURM_SUCCESS != rc) {
			PMIXP_ERROR("Cannot unpack namespace for process #%d",
					i);
			return rc;
		}
		procs[i].nspace[tmp] = '\0';

		unsigned int tmp;
		rc = unpack32(&tmp, buf);
		procs[i].rank = tmp;
		if (SLURM_SUCCESS != rc) {
			PMIXP_ERROR(
					"Cannot unpack ranks for process #%d, nsp=%s",
					i, procs[i].nspace);
			return rc;
		}
	}
	return SLURM_SUCCESS;
}
示例#5
0
static int _slurm_send(pmixp_ep_t *ep, pmixp_base_hdr_t bhdr, Buf buf)
{
	const char *addr = NULL, *data = NULL, *hostlist = NULL;
	char nhdr[PMIXP_BASE_HDR_MAX];
	size_t hsize = 0, dsize = 0;
	int rc;

	/* setup the header */
	addr = pmixp_info_srv_usock_path();

	bhdr.ext_flag = 0;
	if (pmixp_info_srv_direct_conn() && PMIXP_EP_NOIDEID == ep->type) {
		bhdr.ext_flag = 1;
	}

	hsize = _slurm_pack_hdr(&bhdr, nhdr);
	data = _buf_finalize(buf, nhdr, hsize, &dsize);

	switch( ep->type ){
	case PMIXP_EP_HLIST:
		hostlist = ep->ep.hostlist;
		rc = pmixp_stepd_send(ep->ep.hostlist, addr,
				      data, dsize, 500, 7, 0);
		break;
	case PMIXP_EP_NOIDEID: {
		char *nodename = pmixp_info_job_host(ep->ep.nodeid);
		rc = pmixp_p2p_send(nodename, addr, data, dsize,
				    500, 7, 0);
		xfree(nodename);
		break;
	}
	default:
		PMIXP_ERROR("Bad value of the EP type: %d", (int)ep->type);
		abort();
	}

	if (SLURM_SUCCESS != rc) {
		PMIXP_ERROR("Cannot send message to %s, size = %u, "
			    "hostlist:\n%s",
			    addr, (uint32_t) dsize, hostlist);
	}
	return rc;
}
示例#6
0
int pmixp_dmdx_get(const char *nspace, int rank,
		   pmix_modex_cbfunc_t cbfunc, void *cbdata)
{
	dmdx_req_info_t *req;
	char *addr, *host;
	Buf buf;
	int rc;
	uint32_t seq;

	/* need to send the request */
	host = pmixp_nspace_resolve(nspace, rank);
	xassert(NULL != host);
	if (NULL == host) {
		return SLURM_ERROR;
	}

	buf = pmixp_server_new_buf();

	/* setup message header */
	_setup_header(buf, DMDX_REQUEST, nspace, rank, SLURM_SUCCESS);
	/* generate namespace usocket name */
	addr = pmixp_info_nspace_usock(nspace);
	/* store cur seq. num and move to the next request */
	seq = _dmdx_seq_num++;

	/* track this request */
	req = xmalloc(sizeof(dmdx_req_info_t));
	req->seq_num = seq;
	req->cbfunc = cbfunc;
	req->cbdata = cbdata;
	req->ts = time(NULL);
#ifndef NDEBUG
	strncpy(req->nspace, nspace, PMIX_MAX_NSLEN);
	req->rank = rank;
#endif
	list_append(_dmdx_requests, req);

	/* send the request */
	rc = pmixp_server_send(host, PMIXP_MSG_DMDX, seq, addr,
			get_buf_data(buf), get_buf_offset(buf), 1);

	/* cleanup the resources */
	xfree(addr);
	free_buf(buf);

	/* check the return status */
	if (SLURM_SUCCESS != rc) {
		PMIXP_ERROR("Cannot send direct modex request to %s", host);
		cbfunc(PMIX_ERROR, NULL, 0, cbdata, NULL, NULL);
		return SLURM_ERROR;
	}

	return rc;
}
示例#7
0
static void _dmdx_resp(Buf buf, char *sender_host, uint32_t seq_num)
{
	dmdx_req_info_t *req;
	int rank, rc = SLURM_SUCCESS;
	int status;
	char *ns = NULL, *sender_ns = NULL;
	char *data = NULL;
	uint32_t size = 0;

	/* find the request tracker */
	ListIterator it = list_iterator_create(_dmdx_requests);
	req = (dmdx_req_info_t *)list_find(it, _dmdx_req_cmp, &seq_num);
	if (NULL == req) {
		/* We haven't sent this request! */
		PMIXP_ERROR("Received DMDX response with bad " "seq_num=%d from %s!",
			    seq_num, sender_host);
		list_iterator_destroy(it);
		rc = SLURM_ERROR;
		goto exit;
	}

	/* get the service data */
	rc = _read_info(buf, &ns, &rank, &sender_ns, &status);
	if (SLURM_SUCCESS != rc) {
		/* notify libpmix about an error */
		req->cbfunc(PMIX_ERROR, NULL, 0, req->cbdata, NULL, NULL);
		goto exit;
	}

	/* get the modex blob */
	if (SLURM_SUCCESS != (rc = unpackmem_ptr(&data, &size, buf))) {
		/* notify libpmix about an error */
		req->cbfunc(PMIX_ERROR, NULL, 0, req->cbdata, NULL,
				NULL);
		goto exit;
	}

	/* call back to libpmix-server */
	req->cbfunc(status, data, size, req->cbdata, pmixp_free_Buf,
			(void *)buf);

	/* release tracker & list iterator */
	req = NULL;
	list_delete_item(it);
	list_iterator_destroy(it);
exit:
	if (SLURM_SUCCESS != rc) {
		/* we are not expect libpmix to call the callback
		 * to cleanup this buffer */
		free_buf(buf);
	}
	/* no sense to return errors, engine can't do anything
	 * anyway. We've notified libpmix, that's enough */
}
示例#8
0
bool pmixp_server_pp_check_fini(int size)
{
	if ( (pmixp_server_pp_count() + 1) >=
	     (_pmixp_pp_warmup + _pmixp_pp_iters)){
		slurm_mutex_lock(&_pmixp_pp_lock);
		PMIXP_ERROR("latency: %d - %.9lf", size,
			    (GET_TS() - _pmixp_pp_start) / _pmixp_pp_iters );
		slurm_mutex_unlock(&_pmixp_pp_lock);
		return true;
	}
	return false;
}
示例#9
0
static int _read_type(Buf buf, dmdx_type_t *type)
{
	unsigned char t;
	int rc;
	/* 1. unpack message type */
	if (SLURM_SUCCESS != (rc = unpack8(&t, buf))) {
		PMIXP_ERROR("Cannot unpack message type!");
		return SLURM_ERROR;
	}
	*type = (dmdx_type_t)t;
	return SLURM_SUCCESS;
}
示例#10
0
static void _fan_in_finished(pmixp_coll_t *coll)
{
	xassert(PMIXP_COLL_FAN_IN == coll->state);
	coll->state = PMIXP_COLL_FAN_OUT;
	memset(coll->ch_contribs, 0, sizeof(int) * coll->children_cnt);
	coll->contrib_cntr = 0;
	coll->contrib_local = 0;
	set_buf_offset(coll->buf, coll->serv_offs);
	if (SLURM_SUCCESS != _pack_ranges(coll)) {
		PMIXP_ERROR("Cannot pack ranges to coll message header!");
	}
}
示例#11
0
static int _read_info(Buf buf, char **ns, int *rank,
		      char **sender_ns, int *status)
{
	uint32_t cnt, uint32_tmp;
	int rc;
	*ns = NULL;
	*sender_ns = NULL;

	/* 1. unpack namespace */
	if (SLURM_SUCCESS != (rc = unpackmem_ptr(ns, &cnt, buf))) {
		PMIXP_ERROR("Cannot unpack requested namespace!");
		return rc;
	}
	/* We supposed to unpack a whole null-terminated string (with '\0')!
	 * (*ns)[cnt] = '\0';
	 */

	/* 2. unpack rank */
	if (SLURM_SUCCESS != (rc = unpack32(&uint32_tmp, buf))) {
		PMIXP_ERROR("Cannot unpack requested rank!");
		return rc;
	}
	*rank = uint32_tmp;

	if (SLURM_SUCCESS != (rc = unpackmem_ptr(sender_ns, &cnt, buf))) {
		PMIXP_ERROR("Cannot unpack sender namespace!");
		return rc;
	}
	/* We supposed to unpack a whole null-terminated string (with '\0')!
	 * (*sender_ns)[cnt] = '\0';
	 */

	/* 4. unpack status */
	if (SLURM_SUCCESS != (rc = unpack32(&uint32_tmp, buf))) {
		PMIXP_ERROR("Cannot unpack rank!");
		return rc;
	}
	*status = uint32_tmp;
	return SLURM_SUCCESS;
}
示例#12
0
static void _reset_coll_dfwd(pmixp_coll_t *coll)
{
	/* downwards status */
	(void)pmixp_server_buf_reset(coll->dfwd_buf);
	if (SLURM_SUCCESS != _pack_coll_info(coll, coll->dfwd_buf)) {
		PMIXP_ERROR("Cannot pack ranges to message header!");
	}
	coll->dfwd_cb_cnt = 0;
	coll->dfwd_cb_wait = 0;
	coll->dfwd_status = PMIXP_COLL_SND_NONE;
	coll->contrib_prnt = false;
	/* Save the toal service offset */
	coll->dfwd_offset = get_buf_offset(coll->dfwd_buf);
}
示例#13
0
static void _reset_coll_ufwd(pmixp_coll_t *coll)
{
	/* upward status */
	coll->contrib_children = 0;
	coll->contrib_local = false;
	memset(coll->contrib_chld, 0,
	       sizeof(coll->contrib_chld[0]) * coll->chldrn_cnt);
	coll->serv_offs = pmixp_server_buf_reset(coll->ufwd_buf);
	if (SLURM_SUCCESS != _pack_coll_info(coll, coll->ufwd_buf)) {
		PMIXP_ERROR("Cannot pack ranges to message header!");
	}
	coll->ufwd_offset = get_buf_offset(coll->ufwd_buf);
	coll->ufwd_status = PMIXP_COLL_SND_NONE;
}
示例#14
0
void pmixp_coll_log(pmixp_coll_t *coll)
{
	PMIXP_ERROR("Dumping collective state");
	switch(coll->type) {
	case PMIXP_COLL_TYPE_FENCE_RING:
		pmixp_coll_ring_log(coll);
		break;
	case PMIXP_COLL_TYPE_FENCE_TREE:
		pmixp_coll_tree_log(coll);
		break;
	default:
		break;
	}
}
示例#15
0
/*
 * For this to work the following conditions supposed to be
 * satisfied:
 * - SLURM has to be configured with `--enable-debug` option
 * - jobstep needs to have at least two nodes
 * In this case communication exchange will be done between
 * the first two nodes.
 */
void pmixp_server_run_cperf()
{
	int size;
	size_t start, end, bound;

	pmixp_debug_hang(0);

	start = 1 << _pmixp_cperf_low;
	end = 1 << _pmixp_cperf_up;
	bound = 1 << _pmixp_cperf_bound;

	for (size = start; size <= end; size *= 2) {
		int j, iters = _pmixp_cperf_siter;
		struct timeval tv1, tv2;
		if (size >= bound) {
			iters = _pmixp_cperf_liter;
		}
		double times[iters];
		char *data = xmalloc(size);

		PMIXP_ERROR("coll perf %d", size);

		for(j=0; j<iters; j++){
			gettimeofday(&tv1, NULL);
			_pmixp_server_cperf_iter(data, size);
			gettimeofday(&tv2, NULL);
			times[j] = tv2.tv_sec + 1E-6 * tv2.tv_usec -
					(tv1.tv_sec + 1E-6 * tv1.tv_usec);
		}

		for(j=0; j<iters; j++){
			/* Output measurements to the slurmd.log */
			PMIXP_ERROR("\t%d %d: %.9lf", j, size, times[j]);
		}
		xfree(data);
	}
}
示例#16
0
int pmixp_fixrights(char *path, uid_t uid, mode_t mode)
{
	char nested_path[PATH_MAX];
	DIR *dp;
	struct dirent *ent;
	int rc;

	/*
	 * Make sure that "directory" exists and is a directory.
	 */
	if (1 != (rc = _is_dir(path))) {
		PMIXP_ERROR("path=\"%s\" is not a directory", path);
		return (rc == 0) ? -1 : rc;
	}

	if ((dp = opendir(path)) == NULL) {
		PMIXP_ERROR_STD("cannot open path=\"%s\"", path);
		return -1;
	}

	while ((ent = readdir(dp)) != NULL) {
		if (0 == xstrcmp(ent->d_name, ".")
		    || 0 == xstrcmp(ent->d_name, "..")) {
			/* skip special dir's */
			continue;
		}
		snprintf(nested_path, sizeof(nested_path), "%s/%s", path,
			 ent->d_name);
		if (_is_dir(nested_path)) {
			if ((rc = _file_fix_rights(nested_path, uid, mode))) {
				PMIXP_ERROR_STD("cannot fix permissions for "
						"\"%s\"",
						nested_path);
				return -1;
			}
			pmixp_rmdir_recursively(nested_path);
		} else {
			if ((rc = _file_fix_rights(nested_path, uid, mode))) {
				PMIXP_ERROR_STD("cannot fix permissions for "
						"\"%s\"",
						nested_path);
				return -1;
			}
		}
	}
	closedir(dp);
	return 0;
}
示例#17
0
static void _reset_coll(pmixp_coll_t *coll)
{
	switch (coll->state) {
	case PMIXP_COLL_SYNC:
		/* already reset */
		xassert(!coll->contrib_local && !coll->contrib_children &&
			!coll->contrib_prnt);
		break;
	case PMIXP_COLL_COLLECT:
	case PMIXP_COLL_UPFWD:
	case PMIXP_COLL_UPFWD_WSC:
		coll->seq++;
		coll->state = PMIXP_COLL_SYNC;
		_reset_coll_ufwd(coll);
		_reset_coll_dfwd(coll);
		coll->cbdata = NULL;
		coll->cbfunc = NULL;
		break;
	case PMIXP_COLL_UPFWD_WPC:
		/* If we were waiting for the parent contrib,
		 * upward portion is already reset, and may contain
		 * next collective's data */
	case PMIXP_COLL_DOWNFWD:
		/* same with downward state */
		coll->seq++;
		_reset_coll_dfwd(coll);
		if (coll->contrib_local || coll->contrib_children) {
			/* next collective was already started */
			coll->state = PMIXP_COLL_COLLECT;
		} else {
			coll->state = PMIXP_COLL_SYNC;
		}

		if (!coll->contrib_local) {
			/* drop the callback info if we haven't started
			 * next collective locally
			 */
			coll->cbdata = NULL;
			coll->cbfunc = NULL;
		}
		break;
	default:
		PMIXP_ERROR("Bad collective state = %d", (int)coll->state);
		abort();
	}
}
示例#18
0
void pmixp_dmdx_process(Buf buf, char *host, uint32_t seq)
{
	dmdx_type_t type;
	_read_type(buf, &type);

	switch (type) {
	case DMDX_REQUEST:
		_dmdx_req(buf, host, seq);
		break;
	case DMDX_RESPONSE:
		_dmdx_resp(buf, host, seq);
		break;
	default:
		PMIXP_ERROR("Bad request from host %s. Skip", host);
		break;
	}
}
示例#19
0
int pmixp_coll_belong_chk(const pmixp_proc_t *procs, size_t nprocs)
{
	int i;
	pmixp_namespace_t *nsptr = pmixp_nspaces_local();
	/* Find my namespace in the range */
	for (i = 0; i < nprocs; i++) {
		if (0 != xstrcmp(procs[i].nspace, nsptr->name)) {
			continue;
		}
		if (pmixp_lib_is_wildcard(procs[i].rank))
			return 0;
		if (0 <= pmixp_info_taskid2localid(procs[i].rank)) {
			return 0;
		}
	}
	/* we don't participate in this collective! */
	PMIXP_ERROR("No process controlled by this slurmstepd is involved in this collective.");
	return -1;
}
示例#20
0
int pmixp_coll_ring_local(pmixp_coll_t *coll, char *data, size_t size,
			  void *cbfunc, void *cbdata)
{
	int ret = SLURM_SUCCESS;
	pmixp_coll_ring_ctx_t *coll_ctx = NULL;

	/* lock the structure */
	slurm_mutex_lock(&coll->lock);

	/* sanity check */
	pmixp_coll_sanity_check(coll);

	/* setup callback info */
	coll->cbfunc = cbfunc;
	coll->cbdata = cbdata;

	coll_ctx = pmixp_coll_ring_ctx_new(coll);
	if (!coll_ctx) {
		PMIXP_ERROR("Can not get new ring collective context, seq=%u",
			    coll->seq);
		ret = SLURM_ERROR;
		goto exit;
	}

#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: contrib/loc: seqnum=%u, state=%d, size=%lu",
		    coll_ctx, coll_ctx->seq, coll_ctx->state, size);
#endif

	if (_pmixp_coll_contrib(coll_ctx, coll->my_peerid, 0, data, size)) {
		goto exit;
	}

	/* mark local contribution */
	coll_ctx->contrib_local = true;
	_progress_coll_ring(coll_ctx);

exit:
	/* unlock the structure */
	slurm_mutex_unlock(&coll->lock);

	return ret;
}
示例#21
0
static void _fan_out_finished(pmixp_coll_t *coll)
{
	coll->seq++; /* move to the next collective */
	switch (coll->state) {
	case PMIXP_COLL_FAN_OUT:
		coll->state = PMIXP_COLL_SYNC;
		break;
	case PMIXP_COLL_FAN_OUT_IN:
		/* we started to receive data for the new collective
		 * switch to the fan-in stage */
		coll->state = PMIXP_COLL_FAN_IN;
		/* set the right timestamp */
		coll->ts = coll->ts_next;
		break;
	default:
		PMIXP_ERROR("Bad collective state = %d", coll->state);
		xassert(PMIXP_COLL_FAN_OUT == coll->state || PMIXP_COLL_FAN_OUT_IN == coll->state);
	}
}
示例#22
0
int pmixp_coll_belong_chk(pmixp_coll_type_t type,
			  const pmix_proc_t *procs, size_t nprocs)
{
	int i;
	pmixp_namespace_t *nsptr = pmixp_nspaces_local();
	/* Find my namespace in the range */
	for (i = 0; i < nprocs; i++) {
		if (0 != xstrcmp(procs[i].nspace, nsptr->name)) {
			continue;
		}
		if ((procs[i].rank == PMIX_RANK_WILDCARD))
			return 0;
		if (0 <= pmixp_info_taskid2localid(procs[i].rank)) {
			return 0;
		}
	}
	/* we don't participate in this collective! */
	PMIXP_ERROR("Have collective that doesn't include this job's namespace");
	return -1;
}
示例#23
0
int pmixp_rmdir_recursively(char *path)
{
	char nested_path[PATH_MAX];
	DIR *dp;
	struct dirent *ent;

	int rc;

	/*
	 * Make sure that "directory" exists and is a directory.
	 */
	if (1 != (rc = _is_dir(path))) {
		PMIXP_ERROR("path=\"%s\" is not a directory", path);
		return (rc == 0) ? -1 : rc;
	}

	if ((dp = opendir(path)) == NULL) {
		PMIXP_ERROR_STD("cannot open path=\"%s\"", path);
		return -1;
	}

	while ((ent = readdir(dp)) != NULL) {
		if (0 == strcmp(ent->d_name, ".")
				|| 0 == strcmp(ent->d_name, "..")) {
			/* skip special dir's */
			continue;
		}
		snprintf(nested_path, sizeof(nested_path), "%s/%s", path,
				ent->d_name);
		if (_is_dir(nested_path)) {
			pmixp_rmdir_recursively(nested_path);
		} else {
			unlink(nested_path);
		}
	}
	closedir(dp);
	if ((rc = rmdir(path))) {
		PMIXP_ERROR_STD("Cannot remove path=\"%s\"", path);
	}
	return rc;
}
示例#24
0
static void _respond_with_error(int seq_num, char *sender_host,
				char *sender_ns, int status)
{
	Buf buf = create_buf(NULL, 0);
	char *addr;
	int rc;

	/* rank doesn't matter here, don't send it */
	_setup_header(buf, DMDX_RESPONSE, pmixp_info_namespace(), -1, status);
	/* generate namespace usocket name */
	addr = pmixp_info_nspace_usock(sender_ns);
	/* send response */
	rc = pmixp_server_send(sender_host, PMIXP_MSG_DMDX, seq_num, addr,
			get_buf_data(buf), get_buf_offset(buf), 1);
	if (SLURM_SUCCESS != rc) {
		PMIXP_ERROR("Cannot send direct modex error" " response to %s",
				sender_host);
	}
	xfree(addr);
	free_buf(buf);
}
示例#25
0
void pmixp_coll_reset_if_to(pmixp_coll_t *coll, time_t ts)
{
	/* lock the */
	slurm_mutex_lock(&coll->lock);

	if (PMIXP_COLL_SYNC == coll->state) {
		goto unlock;
	}

	if (ts - coll->ts > pmixp_info_timeout()) {
		/* respond to the libpmix */
		coll->cbfunc(PMIX_ERR_TIMEOUT, NULL, 0, coll->cbdata, NULL,
				NULL);
		/* drop the collective */
		_reset_coll(coll);
		/* report the timeout event */
		PMIXP_ERROR("Collective timeout!");
	}
unlock:
	/* unlock the structure */
	slurm_mutex_unlock(&coll->lock);
}
示例#26
0
inline static int _pmixp_coll_contrib(pmixp_coll_ring_ctx_t *coll_ctx,
				      int contrib_id,
				      uint32_t hop, char *data, size_t size)
{
	pmixp_coll_t *coll = _ctx_get_coll(coll_ctx);
	char *data_ptr = NULL;
	int ret;

	/* change the state */
	coll->ts = time(NULL);

	/* save contribution */
	if (!size_buf(coll_ctx->ring_buf)) {
		grow_buf(coll_ctx->ring_buf, size * coll->peers_cnt);
	} else if(remaining_buf(coll_ctx->ring_buf) < size) {
		uint32_t new_size = size_buf(coll_ctx->ring_buf) + size *
			_ring_remain_contrib(coll_ctx);
		grow_buf(coll_ctx->ring_buf, new_size);
	}
	grow_buf(coll_ctx->ring_buf, size);
	data_ptr = get_buf_data(coll_ctx->ring_buf) +
		get_buf_offset(coll_ctx->ring_buf);
	memcpy(data_ptr, data, size);
	set_buf_offset(coll_ctx->ring_buf,
		       get_buf_offset(coll_ctx->ring_buf) + size);

	/* check for ring is complete */
	if (contrib_id != _ring_next_id(coll)) {
		/* forward data to the next node */
		ret = _ring_forward_data(coll_ctx, contrib_id, hop,
					 data_ptr, size);
		if (ret) {
			PMIXP_ERROR("Cannot forward ring data");
			return SLURM_ERROR;
		}
	}

	return SLURM_SUCCESS;
}
示例#27
0
static void _libpmix_cb(void *_vcbdata)
{
	pmixp_coll_cbdata_t *cbdata = (pmixp_coll_cbdata_t*)_vcbdata;
	pmixp_coll_t *coll = cbdata->coll;

	/* lock the collective */
	slurm_mutex_lock(&coll->lock);

	if (cbdata->seq != coll->seq) {
		/* it seems like this collective was reset since the time
		 * we initiated this send.
		 * Just exit to avoid data corruption.
		 */
		PMIXP_ERROR("%p: collective was reset: myseq=%u, curseq=%u",
			    coll, cbdata->seq, coll->seq);
		goto exit;
	}

	xassert(PMIXP_COLL_DOWNFWD == coll->state);

	coll->dfwd_cb_cnt++;
#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: state: %s, snd_status=%s, compl_cnt=%d/%d",
		    coll, pmixp_coll_state2str(coll->state),
		    pmixp_coll_sndstatus2str(coll->dfwd_status),
		    coll->dfwd_cb_cnt, coll->dfwd_cb_wait);
#endif
	_progress_coll(coll);

exit:
	xassert(0 < cbdata->refcntr);
	cbdata->refcntr--;
	if (!cbdata->refcntr) {
		xfree(cbdata);
	}

	/* unlock the collective */
	slurm_mutex_unlock(&coll->lock);
}
示例#28
0
void pmixp_coll_free(pmixp_coll_t *coll)
{
	pmixp_coll_sanity_check(coll);

	if (NULL != coll->pset.procs) {
		xfree(coll->pset.procs);
	}
#ifdef PMIXP_COLL_DEBUG
	hostlist_destroy(coll->peers_hl);
#endif
	/* check for collective in a not-SYNC state - something went wrong */
	switch(coll->type) {
	case PMIXP_COLL_TYPE_FENCE_TREE:
		if (PMIXP_COLL_TREE_SYNC != coll->state.tree.state)
			pmixp_coll_log(coll);

		pmixp_coll_tree_free(&coll->state.tree);
		break;
	case PMIXP_COLL_TYPE_FENCE_RING:
	{
		int i, ctx_in_use = 0;
		for (i = 0; i < PMIXP_COLL_RING_CTX_NUM; i++) {
			pmixp_coll_ring_ctx_t *coll_ctx =
				&coll->state.ring.ctx_array[i];
			if (coll_ctx->in_use)
				ctx_in_use++;
		}
		if (ctx_in_use)
			pmixp_coll_log(coll);
		pmixp_coll_ring_free(&coll->state.ring);
		break;
	}
	default:
		PMIXP_ERROR("Unknown coll type");
		break;
	}
	xfree(coll);
}
示例#29
0
int pmixp_stepd_send(const char *nodelist, const char *address,
		     const char *data, uint32_t len,
		     unsigned int start_delay,
		     unsigned int retry_cnt, int silent)
{

	int retry = 0, rc;
	unsigned int delay = start_delay; /* in milliseconds */
	char *copy_of_nodelist = xstrdup(nodelist);

	while (1) {
		if (!silent && retry >= 1) {
			PMIXP_DEBUG("send failed, rc=%d, try #%d", rc, retry);
		}

		rc = slurm_forward_data(&copy_of_nodelist, (char *)address,
					len, data);

		if (rc == SLURM_SUCCESS)
			break;

		retry++;
		if (retry >= retry_cnt) {
			PMIXP_ERROR("send failed, rc=%d, exceeded the retry limit", rc);
			break;
		}

		/* wait with constantly increasing delay */
		struct timespec ts =
		{(delay / 1000), ((delay % 1000) * 1000000)};
		nanosleep(&ts, NULL);
		delay *= 2;
	}
	xfree(copy_of_nodelist);

	return rc;
}
示例#30
0
static int _progress_dfwd(pmixp_coll_t *coll)
{
	xassert(PMIXP_COLL_DOWNFWD == coll->state);

	/* if all childrens + local callbacks was invoked */
	if (coll->dfwd_cb_wait == coll->dfwd_cb_cnt) {
		coll->dfwd_status = PMIXP_COLL_SND_DONE;
	}

	switch (coll->dfwd_status) {
	case PMIXP_COLL_SND_ACTIVE:
		return false;
	case PMIXP_COLL_SND_FAILED:
		/* something went wrong with upward send.
		 * notify libpmix about that and abort
		 * collective */
		PMIXP_ERROR("%p: failed to send, abort collective", coll);
		if (coll->cbfunc) {
			coll->cbfunc(PMIX_ERROR, NULL, 0, coll->cbdata,
				     NULL, NULL);
		}
		_reset_coll(coll);
		/* Don't need to do anything else */
		return false;
	case PMIXP_COLL_SND_DONE:
		break;
	default:
		/* Should not happen, fatal error */
		abort();
	}
#ifdef PMIXP_COLL_DEBUG
	PMIXP_DEBUG("%p: collective is DONE", coll);
#endif
	_reset_coll(coll);

	return true;
}