/* Transmit PMI Keyval space data */ int slurm_send_kvs_comm_set(struct kvs_comm_set *kvs_set_ptr, int pmi_rank, int pmi_size) { slurm_msg_t msg_send; int rc, retries = 0, timeout = 0; if (kvs_set_ptr == NULL) return EINVAL; if ((rc = _get_addr()) != SLURM_SUCCESS) return rc; _set_pmi_time(); slurm_msg_t_init(&msg_send); msg_send.address = srun_addr; msg_send.msg_type = PMI_KVS_PUT_REQ; msg_send.data = (void *) kvs_set_ptr; /* Send the RPC to the local srun communcation manager. * Since the srun can be sent thousands of messages at * the same time and refuse some connections, retry as * needed. Spread out messages by task's rank. Also * increase the timeout if many tasks since the srun * command is very overloaded. * We also increase the timeout (default timeout is * 10 secs). */ _delay_rpc(pmi_rank, pmi_size); if (pmi_size > 4000) /* 240 secs */ timeout = slurm_get_msg_timeout() * 24000; else if (pmi_size > 1000) /* 120 secs */ timeout = slurm_get_msg_timeout() * 12000; else if (pmi_size > 100) /* 50 secs */ timeout = slurm_get_msg_timeout() * 5000; else if (pmi_size > 10) /* 20 secs */ timeout = slurm_get_msg_timeout() * 2000; while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { if (retries++ > MAX_RETRIES) { error("slurm_send_kvs_comm_set: %m"); return SLURM_ERROR; } else debug("send_kvs retry %d", retries); _delay_rpc(pmi_rank, pmi_size); } return rc; }
/* Wait for barrier and get full PMI Keyval space data */ int slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr, int pmi_rank, int pmi_size) { int rc, srun_fd, retries = 0, timeout = 0; slurm_msg_t msg_send, msg_rcv; slurm_addr_t slurm_addr, srun_reply_addr; char hostname[64]; uint16_t port; kvs_get_msg_t data; char *env_pmi_ifhn; if (kvs_set_ptr == NULL) return EINVAL; *kvs_set_ptr = NULL; /* initialization */ if ((rc = _get_addr()) != SLURM_SUCCESS) { error("_get_addr: %m"); return rc; } _set_pmi_time(); if (pmi_fd < 0) { if ((pmi_fd = slurm_init_msg_engine_port(0)) < 0) { error("slurm_init_msg_engine_port: %m"); return SLURM_ERROR; } fd_set_blocking(pmi_fd); } if (slurm_get_stream_addr(pmi_fd, &slurm_addr) < 0) { error("slurm_get_stream_addr: %m"); return SLURM_ERROR; } /* hostname is not set here, so slurm_get_addr fails slurm_get_addr(&slurm_addr, &port, hostname, sizeof(hostname)); */ port = ntohs(slurm_addr.sin_port); if ((env_pmi_ifhn = getenv("SLURM_PMI_RESP_IFHN"))) { strncpy(hostname, env_pmi_ifhn, sizeof(hostname)); hostname[sizeof(hostname)-1] = 0; } else gethostname_short(hostname, sizeof(hostname)); data.task_id = pmi_rank; data.size = pmi_size; data.port = port; data.hostname = hostname; slurm_msg_t_init(&msg_send); slurm_msg_t_init(&msg_rcv); msg_send.address = srun_addr; msg_send.msg_type = PMI_KVS_GET_REQ; msg_send.data = &data; /* Send the RPC to the local srun communcation manager. * Since the srun can be sent thousands of messages at * the same time and refuse some connections, retry as * needed. Wait until all key-pairs have been sent by * all tasks then spread out messages by task's rank. * Also increase the message timeout if many tasks * since the srun command can get very overloaded (the * default timeout is 10 secs). */ _delay_rpc(pmi_rank, pmi_size); if (pmi_size > 4000) /* 240 secs */ timeout = slurm_get_msg_timeout() * 24000; else if (pmi_size > 1000) /* 120 secs */ timeout = slurm_get_msg_timeout() * 12000; else if (pmi_size > 100) /* 60 secs */ timeout = slurm_get_msg_timeout() * 6000; else if (pmi_size > 10) /* 20 secs */ timeout = slurm_get_msg_timeout() * 2000; while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { if (retries++ > MAX_RETRIES) { error("slurm_get_kvs_comm_set: %m"); return SLURM_ERROR; } else debug("get kvs retry %d", retries); _delay_rpc(pmi_rank, pmi_size); } if (rc != SLURM_SUCCESS) { error("slurm_get_kvs_comm_set error_code=%d", rc); return rc; } /* get the message after all tasks reach the barrier */ srun_fd = slurm_accept_msg_conn(pmi_fd, &srun_reply_addr); if (srun_fd < 0) { error("slurm_accept_msg_conn: %m"); return errno; } while ((rc = slurm_receive_msg(srun_fd, &msg_rcv, timeout)) != 0) { if (errno == EINTR) continue; error("slurm_receive_msg: %m"); slurm_close(srun_fd); return errno; } if (msg_rcv.auth_cred) (void)g_slurm_auth_destroy(msg_rcv.auth_cred); if (msg_rcv.msg_type != PMI_KVS_GET_RESP) { error("slurm_get_kvs_comm_set msg_type=%d", msg_rcv.msg_type); slurm_close(srun_fd); return SLURM_UNEXPECTED_MSG_ERROR; } if (slurm_send_rc_msg(&msg_rcv, SLURM_SUCCESS) < 0) error("slurm_send_rc_msg: %m"); slurm_close(srun_fd); *kvs_set_ptr = msg_rcv.data; rc = _forward_comm_set(*kvs_set_ptr); return rc; }