예제 #1
0
파일: slurm_pmi.c 프로젝트: BYUHPC/slurm
/* Transmit PMI Keyval space data */
int slurm_send_kvs_comm_set(struct kvs_comm_set *kvs_set_ptr,
		int pmi_rank, int pmi_size)
{
	slurm_msg_t msg_send;
	int rc, retries = 0, timeout = 0;

	if (kvs_set_ptr == NULL)
		return EINVAL;

	if ((rc = _get_addr()) != SLURM_SUCCESS)
		return rc;
	_set_pmi_time();

	slurm_msg_t_init(&msg_send);
	msg_send.address = srun_addr;
	msg_send.msg_type = PMI_KVS_PUT_REQ;
	msg_send.data = (void *) kvs_set_ptr;

	/* Send the RPC to the local srun communcation manager.
	 * Since the srun can be sent thousands of messages at
	 * the same time and refuse some connections, retry as
	 * needed. Spread out messages by task's rank. Also
	 * increase the timeout if many tasks since the srun
	 * command is very overloaded.
	 * We also increase the timeout (default timeout is
	 * 10 secs). */
	_delay_rpc(pmi_rank, pmi_size);
	if      (pmi_size > 4000)	/* 240 secs */
		timeout = slurm_get_msg_timeout() * 24000;
	else if (pmi_size > 1000)	/* 120 secs */
		timeout = slurm_get_msg_timeout() * 12000;
	else if (pmi_size > 100)	/* 50 secs */
		timeout = slurm_get_msg_timeout() * 5000;
	else if (pmi_size > 10)		/* 20 secs */
		timeout = slurm_get_msg_timeout() * 2000;

	while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) {
		if (retries++ > MAX_RETRIES) {
			error("slurm_send_kvs_comm_set: %m");
			return SLURM_ERROR;
		} else
			debug("send_kvs retry %d", retries);
		_delay_rpc(pmi_rank, pmi_size);
	}

	return rc;
}
예제 #2
0
파일: slurm_pmi.c 프로젝트: mart1nl/slurm
/* Wait for barrier and get full PMI Keyval space data */
int  slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr,
		int pmi_rank, int pmi_size)
{
	int rc, srun_fd, retries = 0, timeout = 0;
	slurm_msg_t msg_send, msg_rcv;
	slurm_addr_t slurm_addr, srun_reply_addr;
	char hostname[64];
	uint16_t port;
	kvs_get_msg_t data;
	char *env_pmi_ifhn;

	if (kvs_set_ptr == NULL)
		return EINVAL;
	*kvs_set_ptr = NULL;	/* initialization */

	if ((rc = _get_addr()) != SLURM_SUCCESS) {
		error("_get_addr: %m");
		return rc;
	}

	_set_pmi_time();

	if (pmi_fd < 0) {
		if ((pmi_fd = slurm_init_msg_engine_port(0)) < 0) {
			error("slurm_init_msg_engine_port: %m");
			return SLURM_ERROR;
		}
		fd_set_blocking(pmi_fd);
	}
	if (slurm_get_stream_addr(pmi_fd, &slurm_addr) < 0) {
		error("slurm_get_stream_addr: %m");
		return SLURM_ERROR;
	}
	/* hostname is not set here, so slurm_get_addr fails
	slurm_get_addr(&slurm_addr, &port, hostname, sizeof(hostname)); */
	port = ntohs(slurm_addr.sin_port);
	if ((env_pmi_ifhn = getenv("SLURM_PMI_RESP_IFHN"))) {
		strncpy(hostname, env_pmi_ifhn, sizeof(hostname));
		hostname[sizeof(hostname)-1] = 0;
	} else
		gethostname_short(hostname, sizeof(hostname));

	data.task_id = pmi_rank;
	data.size = pmi_size;
	data.port = port;
	data.hostname = hostname;
	slurm_msg_t_init(&msg_send);
	slurm_msg_t_init(&msg_rcv);
	msg_send.address = srun_addr;
	msg_send.msg_type = PMI_KVS_GET_REQ;
	msg_send.data = &data;

	/* Send the RPC to the local srun communcation manager.
	 * Since the srun can be sent thousands of messages at
	 * the same time and refuse some connections, retry as
	 * needed. Wait until all key-pairs have been sent by
	 * all tasks then spread out messages by task's rank.
	 * Also increase the message timeout if many tasks
	 * since the srun command can get very overloaded (the
	 * default timeout is 10 secs).
	 */
	_delay_rpc(pmi_rank, pmi_size);
	if      (pmi_size > 4000)	/* 240 secs */
		timeout = slurm_get_msg_timeout() * 24000;
	else if (pmi_size > 1000)	/* 120 secs */
		timeout = slurm_get_msg_timeout() * 12000;
	else if (pmi_size > 100)	/* 60 secs */
		timeout = slurm_get_msg_timeout() * 6000;
	else if (pmi_size > 10)		/* 20 secs */
		timeout = slurm_get_msg_timeout() * 2000;

	while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) {
		if (retries++ > MAX_RETRIES) {
			error("slurm_get_kvs_comm_set: %m");
			return SLURM_ERROR;
		} else
			debug("get kvs retry %d", retries);
		_delay_rpc(pmi_rank, pmi_size);
	}
	if (rc != SLURM_SUCCESS) {
		error("slurm_get_kvs_comm_set error_code=%d", rc);
		return rc;
	}

	/* get the message after all tasks reach the barrier */
	srun_fd = slurm_accept_msg_conn(pmi_fd, &srun_reply_addr);
	if (srun_fd < 0) {
		error("slurm_accept_msg_conn: %m");
		return errno;
	}

	while ((rc = slurm_receive_msg(srun_fd, &msg_rcv, timeout)) != 0) {
		if (errno == EINTR)
			continue;
		error("slurm_receive_msg: %m");
		slurm_close(srun_fd);
		return errno;
	}
	if (msg_rcv.auth_cred)
		(void)g_slurm_auth_destroy(msg_rcv.auth_cred);

	if (msg_rcv.msg_type != PMI_KVS_GET_RESP) {
		error("slurm_get_kvs_comm_set msg_type=%d", msg_rcv.msg_type);
		slurm_close(srun_fd);
		return SLURM_UNEXPECTED_MSG_ERROR;
	}
	if (slurm_send_rc_msg(&msg_rcv, SLURM_SUCCESS) < 0)
		error("slurm_send_rc_msg: %m");

	slurm_close(srun_fd);
	*kvs_set_ptr = msg_rcv.data;

	rc = _forward_comm_set(*kvs_set_ptr);
	return rc;
}