Beispiel #1
0
static int _set_mapsinfo(List lresp)
{
	pmix_info_t *kvp;
	char *regexp, *input;
	pmixp_namespace_t *nsptr = pmixp_nspaces_local();
	hostlist_t hl = nsptr->hl;
	int rc, i, j;
	int count = hostlist_count(hl);

	input = hostlist_deranged_string_malloc(hl);
	rc = PMIx_generate_regex(input, &regexp);
	free(input);
	if (PMIX_SUCCESS != rc) {
		return SLURM_ERROR;
	}
	PMIXP_ALLOC_KEY(kvp, PMIX_NODE_MAP);
	PMIX_VAL_SET(&kvp->value, string, regexp);
	regexp = NULL;
	list_append(lresp, kvp);

	input = NULL;
	for (i = 0; i < count; i++) {
		/* for each node - run through all tasks and
		 * record taskid's that reside on this node
		 */
		int first = 1;
		for (j = 0; j < nsptr->ntasks; j++) {
			if (nsptr->task_map[j] == i) {
				if (first) {
					first = 0;
				} else {
					xstrfmtcat(input, ",");
				}
				xstrfmtcat(input, "%u", j);
			}
		}
		if (i < (count - 1)) {
			xstrfmtcat(input, ";");
		}
	}
	rc = PMIx_generate_ppn(input, &regexp);
	xfree(input);
	if (PMIX_SUCCESS != rc) {
		return SLURM_ERROR;
	}

	PMIXP_ALLOC_KEY(kvp, PMIX_PROC_MAP);
	PMIX_VAL_SET(&kvp->value, string, regexp);
	regexp = NULL;
	list_append(lresp, kvp);

	PMIXP_ALLOC_KEY(kvp, PMIX_ANL_MAP);
	PMIX_VAL_SET(&kvp->value, string, pmixp_info_task_map());
	regexp = NULL;
	list_append(lresp, kvp);

	return SLURM_SUCCESS;
}
Beispiel #2
0
int pmixp_coll_belong_chk(const pmixp_proc_t *procs, size_t nprocs)
{
	int i;
	pmixp_namespace_t *nsptr = pmixp_nspaces_local();
	/* Find my namespace in the range */
	for (i = 0; i < nprocs; i++) {
		if (0 != xstrcmp(procs[i].nspace, nsptr->name)) {
			continue;
		}
		if (pmixp_lib_is_wildcard(procs[i].rank))
			return 0;
		if (0 <= pmixp_info_taskid2localid(procs[i].rank)) {
			return 0;
		}
	}
	/* we don't participate in this collective! */
	PMIXP_ERROR("No process controlled by this slurmstepd is involved in this collective.");
	return -1;
}
Beispiel #3
0
int pmixp_coll_belong_chk(pmixp_coll_type_t type,
			  const pmix_proc_t *procs, size_t nprocs)
{
	int i;
	pmixp_namespace_t *nsptr = pmixp_nspaces_local();
	/* Find my namespace in the range */
	for (i = 0; i < nprocs; i++) {
		if (0 != xstrcmp(procs[i].nspace, nsptr->name)) {
			continue;
		}
		if ((procs[i].rank == PMIX_RANK_WILDCARD))
			return 0;
		if (0 <= pmixp_info_taskid2localid(procs[i].rank)) {
			return 0;
		}
	}
	/* we don't participate in this collective! */
	PMIXP_ERROR("Have collective that doesn't include this job's namespace");
	return -1;
}
Beispiel #4
0
static void _dmdx_req(Buf buf, char *sender_host, uint32_t seq_num)
{
	int rank, rc;
	int status;
	char *ns = NULL, *sender_ns = NULL;
	pmixp_namespace_t *nsptr;
	dmdx_caddy_t *caddy = NULL;

	rc = _read_info(buf, &ns, &rank, &sender_ns,&status);
	if (SLURM_SUCCESS != rc) {
		/* there is not much we can do here, but data corruption shouldn't happen */
		PMIXP_ERROR("Fail to unpack header data in" " request from %s, rc = %d",
			    sender_host, rc);
		goto exit;
	}

	if (0 != xstrcmp(ns, pmixp_info_namespace())) {
		/* request for namespase that is not controlled by this daemon
		 * considered as error. This may change in future.  */
		PMIXP_ERROR("Bad request from %s: asked for" " nspace = %s, mine is %s",
			    sender_host, ns, pmixp_info_namespace());
		_respond_with_error(seq_num, sender_host, sender_ns,
				PMIX_ERR_INVALID_NAMESPACE);
		goto exit;
	}

	nsptr = pmixp_nspaces_local();
	if (nsptr->ntasks <= rank) {
		PMIXP_ERROR("Bad request from %s: nspace \"%s\"" " has only %d ranks, asked for %d",
			    sender_host, ns, nsptr->ntasks, rank);
		_respond_with_error(seq_num, sender_host, sender_ns,
				PMIX_ERR_BAD_PARAM);
		goto exit;
	}

	/* setup temp structure to handle information fro _dmdx_pmix_cb */
	caddy = xmalloc(sizeof(dmdx_caddy_t));
	caddy->seq_num = seq_num;

	/* ns is a pointer inside incoming buffer */
	strncpy(caddy->proc.nspace, ns, PMIX_MAX_NSLEN);
	ns = NULL; /* protect the data */
	caddy->proc.rank = rank;

	/* sender_host was passed from outside - copy it */
	caddy->sender_host = xstrdup(sender_host);
	sender_host = NULL; /* protect the data */

	/* sender_ns is a pointer inside incoming buffer */
	caddy->sender_ns = xstrdup(sender_ns);
	sender_ns = NULL;

	rc = PMIx_server_dmodex_request(&caddy->proc, _dmdx_pmix_cb,
			(void *)caddy);
	if (PMIX_SUCCESS != rc) {
		PMIXP_ERROR("Can't request modex data from libpmix-server," "requesting host = %s, nspace = %s, rank = %d, rc = %d",
			    caddy->sender_host, caddy->proc.nspace,
			    caddy->proc.rank, rc);
		_respond_with_error(seq_num, caddy->sender_host,
				caddy->sender_ns, rc);
		_dmdx_free_caddy(caddy);
	}
exit:
	/* we don't need this buffer anymore */
	free_buf(buf);

	/* no sense to return errors, engine can't do anything
	 * anyway. We've notified libpmix, that's enough */
}
Beispiel #5
0
/*
 * information about relative ranks as assigned by the RM
 */
static void _set_procdatas(List lresp)
{
	pmixp_namespace_t *nsptr = pmixp_nspaces_local();
	pmix_info_t *kvp, *tkvp;
	char *p = NULL;
	int i;

	/* (char*) jobid assigned by scheduler */
	xstrfmtcat(p, "%d.%d", pmixp_info_jobid(), pmixp_info_stepid());
	PMIXP_ALLOC_KEY(kvp, PMIX_JOBID);
	PMIX_VAL_SET(&kvp->value, string, p);
	xfree(p);
	list_append(lresp, kvp);

	PMIXP_ALLOC_KEY(kvp, PMIX_NODEID);
	PMIX_VAL_SET(&kvp->value, uint32_t, nsptr->node_id);
	list_append(lresp, kvp);

	/* store information about local processes */
	for (i = 0; i < pmixp_info_tasks(); i++) {
		List rankinfo;
		ListIterator it;
		int count, j, localid, nodeid;
		char *nodename;
		pmix_info_t *info;

		rankinfo = list_create(pmixp_xfree_xmalloced);

		PMIXP_ALLOC_KEY(kvp, PMIX_RANK);
		PMIX_VAL_SET(&kvp->value, int, i);
		list_append(rankinfo, kvp);

		/* TODO: always use 0 so far. this is not the general case though
		 * (see SLURM MIMD: man srun, section MULTIPLE PROGRAM CONFIGURATION)
		 */
		PMIXP_ALLOC_KEY(kvp, PMIX_APPNUM);
		PMIX_VAL_SET(&kvp->value, int, 0);
		list_append(rankinfo, kvp);

		/* TODO: the same as for previous here */
		PMIXP_ALLOC_KEY(kvp, PMIX_APPLDR);
		PMIX_VAL_SET(&kvp->value, int, 0);
		list_append(rankinfo, kvp);

		/* TODO: fix when several apps will appear */
		PMIXP_ALLOC_KEY(kvp, PMIX_GLOBAL_RANK);
		PMIX_VAL_SET(&kvp->value, uint32_t, i);
		list_append(rankinfo, kvp);

		/* TODO: fix when several apps will appear */
		PMIXP_ALLOC_KEY(kvp, PMIX_APP_RANK);
		PMIX_VAL_SET(&kvp->value, uint32_t, i);
		list_append(rankinfo, kvp);

		localid = pmixp_info_taskid2localid(i);
		/* this rank is local, store local info ab't it! */
		if (0 <= localid) {
			PMIXP_ALLOC_KEY(kvp, PMIX_LOCAL_RANK);
			PMIX_VAL_SET(&kvp->value, uint16_t, localid);
			list_append(rankinfo, kvp);

			/* TODO: fix when several apps will appear */
			PMIXP_ALLOC_KEY(kvp, PMIX_NODE_RANK);
			PMIX_VAL_SET(&kvp->value, uint16_t, localid);
			list_append(rankinfo, kvp);
		}

		nodeid = nsptr->task_map[i];
		nodename = hostlist_nth(nsptr->hl, nodeid);
		PMIXP_ALLOC_KEY(kvp, PMIX_HOSTNAME);
		PMIX_VAL_SET(&kvp->value, string, nodename);
		list_append(rankinfo, kvp);
		free(nodename);

		/* merge rankinfo into one PMIX_PROC_DATA key */
		count = list_count(rankinfo);
		PMIXP_ALLOC_KEY(kvp, PMIX_PROC_DATA);
		kvp->value.type = PMIX_INFO_ARRAY;
		kvp->value.data.array.size = count;
		PMIX_INFO_CREATE(info, count);
		it = list_iterator_create(rankinfo);
		j = 0;
		while (NULL != (tkvp = list_next(it))) {
			/* Just copy all the fields here. We will free original kvp's
			 * using list_destroy without free'ing their fields so it is
			 * safe to do so.
			 */
			info[j] = *tkvp;
			j++;
		}
		list_destroy(rankinfo);

		kvp->value.data.array.array = (pmix_info_t *)info;
		info = NULL;

		/* put the complex key to the list */
		list_append(lresp, kvp);
	}
}