static int _signal_batch_script_step(const resource_allocation_response_msg_t *allocation, uint32_t signal) { slurm_msg_t msg; kill_tasks_msg_t rpc; int rc = SLURM_SUCCESS; char *name = nodelist_nth_host(allocation->node_list, 0); if (!name) { error("_signal_batch_script_step: " "can't get the first name out of %s", allocation->node_list); return -1; } rpc.job_id = allocation->job_id; rpc.job_step_id = SLURM_BATCH_SCRIPT; rpc.signal = signal; slurm_msg_t_init(&msg); msg.msg_type = REQUEST_SIGNAL_TASKS; msg.data = &rpc; if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) { error("_signal_batch_script_step: " "can't find address for host %s, check slurm.conf", name); free(name); return -1; } free(name); if (slurm_send_recv_rc_msg_only_one(&msg, &rc, 0) < 0) { error("_signal_batch_script_step: %m"); rc = -1; } return rc; }
/* Ping primary ControlMachine * RET 0 if no error */ static int _ping_controller(void) { int rc; slurm_msg_t req; /* Locks: Read configuration */ slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; /* * Set address of controller to ping */ slurm_msg_t_init(&req); lock_slurmctld(config_read_lock); debug3("pinging slurmctld at %s", slurmctld_conf.control_addr); slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port, slurmctld_conf.control_addr); unlock_slurmctld(config_read_lock); req.msg_type = REQUEST_PING; if (slurm_send_recv_rc_msg_only_one(&req, &rc, 0) < 0) { error("_ping_controller/slurm_send_node_msg error: %m"); return SLURM_ERROR; } if (rc) { error("_ping_controller/response error %d", rc); return SLURM_PROTOCOL_ERROR; } return SLURM_PROTOCOL_SUCCESS; }
/* Forward keypair info to other tasks as required. * Clear message forward structure upon completion. * The messages are forwarded sequentially. */ static int _forward_comm_set(struct kvs_comm_set *kvs_set_ptr) { int i, rc = SLURM_SUCCESS; int tmp_host_cnt = kvs_set_ptr->host_cnt; slurm_msg_t msg_send; int msg_rc; kvs_set_ptr->host_cnt = 0; for (i=0; i<tmp_host_cnt; i++) { if (kvs_set_ptr->kvs_host_ptr[i].port == 0) continue; /* empty */ slurm_msg_t_init(&msg_send); msg_send.msg_type = PMI_KVS_GET_RESP; msg_send.data = (void *) kvs_set_ptr; slurm_set_addr(&msg_send.address, kvs_set_ptr->kvs_host_ptr[i].port, kvs_set_ptr->kvs_host_ptr[i].hostname); if (slurm_send_recv_rc_msg_only_one(&msg_send, &msg_rc, 0) < 0) { error("Could not forward msg to %s", kvs_set_ptr->kvs_host_ptr[i].hostname); msg_rc = 1; } rc = MAX(rc, msg_rc); xfree(kvs_set_ptr->kvs_host_ptr[i].hostname); } xfree(kvs_set_ptr->kvs_host_ptr); return rc; }
static void *_msg_thread(void *x) { struct msg_arg *msg_arg_ptr = (struct msg_arg *) x; int rc, success = 0, timeout; slurm_msg_t msg_send; slurm_msg_t_init(&msg_send); debug2("KVS_Barrier msg to %s:%u", msg_arg_ptr->bar_ptr->hostname, msg_arg_ptr->bar_ptr->port); msg_send.msg_type = PMI_KVS_GET_RESP; msg_send.data = (void *) msg_arg_ptr->kvs_ptr; slurm_set_addr(&msg_send.address, msg_arg_ptr->bar_ptr->port, msg_arg_ptr->bar_ptr->hostname); timeout = slurm_get_msg_timeout() * 10000; if (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { error("slurm_send_recv_rc_msg_only_one: %m"); } else if (rc != SLURM_SUCCESS) { error("KVS_Barrier confirm from %s, rc=%d", msg_arg_ptr->bar_ptr->hostname, rc); } else { /* successfully transmitted KVS keypairs */ success = 1; } slurm_mutex_lock(&agent_mutex); agent_cnt--; pthread_cond_signal(&agent_cond); slurm_mutex_unlock(&agent_mutex); xfree(x); return NULL; }
/* Transmit PMI Keyval space data */ int slurm_send_kvs_comm_set(struct kvs_comm_set *kvs_set_ptr, int pmi_rank, int pmi_size) { slurm_msg_t msg_send; int rc, retries = 0, timeout = 0; if (kvs_set_ptr == NULL) return EINVAL; if ((rc = _get_addr()) != SLURM_SUCCESS) return rc; _set_pmi_time(); slurm_msg_t_init(&msg_send); msg_send.address = srun_addr; msg_send.msg_type = PMI_KVS_PUT_REQ; msg_send.data = (void *) kvs_set_ptr; /* Send the RPC to the local srun communcation manager. * Since the srun can be sent thousands of messages at * the same time and refuse some connections, retry as * needed. Spread out messages by task's rank. Also * increase the timeout if many tasks since the srun * command is very overloaded. * We also increase the timeout (default timeout is * 10 secs). */ _delay_rpc(pmi_rank, pmi_size); if (pmi_size > 4000) /* 240 secs */ timeout = slurm_get_msg_timeout() * 24000; else if (pmi_size > 1000) /* 120 secs */ timeout = slurm_get_msg_timeout() * 12000; else if (pmi_size > 100) /* 50 secs */ timeout = slurm_get_msg_timeout() * 5000; else if (pmi_size > 10) /* 20 secs */ timeout = slurm_get_msg_timeout() * 2000; while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { if (retries++ > MAX_RETRIES) { error("slurm_send_kvs_comm_set: %m"); return SLURM_ERROR; } else debug("send_kvs retry %d", retries); _delay_rpc(pmi_rank, pmi_size); } return rc; }
/* * Tell the primary_controller to relinquish control, primary control_machine * has to suspend operation * Based on _shutdown_backup_controller from controller.c * wait_time - How long to wait for primary controller to write state, seconds. * RET 0 or an error code * NOTE: READ lock_slurmctld config before entry (or be single-threaded) */ static int _shutdown_primary_controller(int wait_time) { int rc; slurm_msg_t req; slurm_msg_t_init(&req); if ((slurmctld_conf.control_addr == NULL) || (slurmctld_conf.control_addr[0] == '\0')) { error("_shutdown_primary_controller: " "no primary controller to shutdown"); return SLURM_ERROR; } slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port, slurmctld_conf.control_addr); /* send request message */ req.msg_type = REQUEST_CONTROL; if (slurm_send_recv_rc_msg_only_one(&req, &rc, (CONTROL_TIMEOUT * 1000)) < 0) { error("_shutdown_primary_controller:send/recv: %m"); return SLURM_ERROR; } if (rc == ESLURM_DISABLED) debug("primary controller responding"); else if (rc == 0) { debug("primary controller has relinquished control"); } else { error("_shutdown_primary_controller: %s", slurm_strerror(rc)); return SLURM_ERROR; } /* FIXME: Ideally the REQUEST_CONTROL RPC does not return until all * other activity has ceased and the state has been saved. That is * not presently the case (it returns when no other work is pending, * so the state save should occur right away). We sleep for a while * here and give the primary controller time to shutdown */ if (wait_time) sleep(wait_time); return SLURM_SUCCESS; }
static int _terminate_batch_script_step(const resource_allocation_response_msg_t * allocation) { slurm_msg_t msg; kill_tasks_msg_t rpc; int rc = SLURM_SUCCESS; int i; char *name = nodelist_nth_host(allocation->node_list, 0); if (!name) { error("_terminate_batch_script_step: " "can't get the first name out of %s", allocation->node_list); return -1; } rpc.job_id = allocation->job_id; rpc.job_step_id = SLURM_BATCH_SCRIPT; rpc.signal = (uint32_t)-1; /* not used by slurmd */ slurm_msg_t_init(&msg); msg.msg_type = REQUEST_TERMINATE_TASKS; msg.data = &rpc; if (slurm_conf_get_addr(name, &msg.address) == SLURM_ERROR) { error("_terminate_batch_script_step: " "can't find address for host %s, check slurm.conf", name); free(name); return -1; } free(name); i = slurm_send_recv_rc_msg_only_one(&msg, &rc, 0); if (i != 0) rc = i; return rc; }
/* Wait for barrier and get full PMI Keyval space data */ int slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr, int pmi_rank, int pmi_size) { int rc, srun_fd, retries = 0, timeout = 0; slurm_msg_t msg_send, msg_rcv; slurm_addr_t slurm_addr, srun_reply_addr; char hostname[64]; uint16_t port; kvs_get_msg_t data; char *env_pmi_ifhn; if (kvs_set_ptr == NULL) return EINVAL; *kvs_set_ptr = NULL; /* initialization */ if ((rc = _get_addr()) != SLURM_SUCCESS) { error("_get_addr: %m"); return rc; } _set_pmi_time(); if (pmi_fd < 0) { if ((pmi_fd = slurm_init_msg_engine_port(0)) < 0) { error("slurm_init_msg_engine_port: %m"); return SLURM_ERROR; } fd_set_blocking(pmi_fd); } if (slurm_get_stream_addr(pmi_fd, &slurm_addr) < 0) { error("slurm_get_stream_addr: %m"); return SLURM_ERROR; } /* hostname is not set here, so slurm_get_addr fails slurm_get_addr(&slurm_addr, &port, hostname, sizeof(hostname)); */ port = ntohs(slurm_addr.sin_port); if ((env_pmi_ifhn = getenv("SLURM_PMI_RESP_IFHN"))) { strncpy(hostname, env_pmi_ifhn, sizeof(hostname)); hostname[sizeof(hostname)-1] = 0; } else gethostname_short(hostname, sizeof(hostname)); data.task_id = pmi_rank; data.size = pmi_size; data.port = port; data.hostname = hostname; slurm_msg_t_init(&msg_send); slurm_msg_t_init(&msg_rcv); msg_send.address = srun_addr; msg_send.msg_type = PMI_KVS_GET_REQ; msg_send.data = &data; /* Send the RPC to the local srun communcation manager. * Since the srun can be sent thousands of messages at * the same time and refuse some connections, retry as * needed. Wait until all key-pairs have been sent by * all tasks then spread out messages by task's rank. * Also increase the message timeout if many tasks * since the srun command can get very overloaded (the * default timeout is 10 secs). */ _delay_rpc(pmi_rank, pmi_size); if (pmi_size > 4000) /* 240 secs */ timeout = slurm_get_msg_timeout() * 24000; else if (pmi_size > 1000) /* 120 secs */ timeout = slurm_get_msg_timeout() * 12000; else if (pmi_size > 100) /* 60 secs */ timeout = slurm_get_msg_timeout() * 6000; else if (pmi_size > 10) /* 20 secs */ timeout = slurm_get_msg_timeout() * 2000; while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { if (retries++ > MAX_RETRIES) { error("slurm_get_kvs_comm_set: %m"); return SLURM_ERROR; } else debug("get kvs retry %d", retries); _delay_rpc(pmi_rank, pmi_size); } if (rc != SLURM_SUCCESS) { error("slurm_get_kvs_comm_set error_code=%d", rc); return rc; } /* get the message after all tasks reach the barrier */ srun_fd = slurm_accept_msg_conn(pmi_fd, &srun_reply_addr); if (srun_fd < 0) { error("slurm_accept_msg_conn: %m"); return errno; } while ((rc = slurm_receive_msg(srun_fd, &msg_rcv, timeout)) != 0) { if (errno == EINTR) continue; error("slurm_receive_msg: %m"); slurm_close(srun_fd); return errno; } if (msg_rcv.auth_cred) (void)g_slurm_auth_destroy(msg_rcv.auth_cred); if (msg_rcv.msg_type != PMI_KVS_GET_RESP) { error("slurm_get_kvs_comm_set msg_type=%d", msg_rcv.msg_type); slurm_close(srun_fd); return SLURM_UNEXPECTED_MSG_ERROR; } if (slurm_send_rc_msg(&msg_rcv, SLURM_SUCCESS) < 0) error("slurm_send_rc_msg: %m"); slurm_close(srun_fd); *kvs_set_ptr = msg_rcv.data; rc = _forward_comm_set(*kvs_set_ptr); return rc; }