/* * srun_node_fail - notify srun of a node's failure * IN job_ptr - job to notify * IN node_name - name of failed node */ extern void srun_node_fail(struct job_record *job_ptr, char *node_name) { #ifndef HAVE_FRONT_END struct node_record *node_ptr; #endif int bit_position = -1; slurm_addr_t * addr; srun_node_fail_msg_t *msg_arg; ListIterator step_iterator; struct step_record *step_ptr; xassert(job_ptr); xassert(node_name); if (!job_ptr || !IS_JOB_RUNNING(job_ptr)) return; #ifdef HAVE_FRONT_END /* Purge all jobs steps in front end mode */ #else if (!node_name || (node_ptr = find_node_record(node_name)) == NULL) return; bit_position = node_ptr - node_record_table_ptr; #endif step_iterator = list_iterator_create(job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next(step_iterator))) { if (step_ptr->step_node_bitmap == NULL) /* pending step */ continue; if ((bit_position >= 0) && (!bit_test(step_ptr->step_node_bitmap, bit_position))) continue; /* job step not on this node */ if ( (step_ptr->port == 0) || (step_ptr->host == NULL) || (step_ptr->batch_step) || (step_ptr->host[0] == '\0') ) continue; addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, step_ptr->port, step_ptr->host); msg_arg = xmalloc(sizeof(srun_node_fail_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->step_id = step_ptr->step_id; msg_arg->nodelist = xstrdup(node_name); _srun_agent_launch(addr, step_ptr->host, SRUN_NODE_FAIL, msg_arg, step_ptr->start_protocol_ver); } list_iterator_destroy(step_iterator); if (job_ptr->other_port && job_ptr->alloc_node && job_ptr->resp_host) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host); msg_arg = xmalloc(sizeof(srun_node_fail_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->step_id = NO_VAL; msg_arg->nodelist = xstrdup(node_name); _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_NODE_FAIL, msg_arg, job_ptr->start_protocol_ver); } }
/* * srun_allocate - notify srun of a resource allocation * IN job_ptr - job allocated resources */ extern void srun_allocate(struct job_record *job_ptr) { struct job_record *pack_job, *pack_leader; resource_allocation_response_msg_t *msg_arg = NULL; slurm_addr_t *addr; ListIterator iter; List job_resp_list = NULL; xassert(job_ptr); if (!job_ptr || !job_ptr->alloc_resp_port || !job_ptr->alloc_node || !job_ptr->resp_host || !job_ptr->job_resrcs || !job_ptr->job_resrcs->cpu_array_cnt) return; if (job_ptr->pack_job_id == 0) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->alloc_resp_port, job_ptr->resp_host); msg_arg = build_alloc_msg(job_ptr, SLURM_SUCCESS, NULL); _srun_agent_launch(addr, job_ptr->alloc_node, RESPONSE_RESOURCE_ALLOCATION, msg_arg, job_ptr->start_protocol_ver); } else if (_pending_pack_jobs(job_ptr)) { return; } else if ((pack_leader = find_job_record(job_ptr->pack_job_id))) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, pack_leader->alloc_resp_port, pack_leader->resp_host); job_resp_list = list_create(_free_srun_alloc); iter = list_iterator_create(pack_leader->pack_job_list); while ((pack_job = (struct job_record *) list_next(iter))) { if (pack_leader->pack_job_id != pack_job->pack_job_id) { error("%s: Bad pack_job_list for %pJ", __func__, pack_leader); continue; } msg_arg = build_alloc_msg(pack_job, SLURM_SUCCESS, NULL); list_append(job_resp_list, msg_arg); msg_arg = NULL; } list_iterator_destroy(iter); _srun_agent_launch(addr, job_ptr->alloc_node, RESPONSE_JOB_PACK_ALLOCATION, job_resp_list, job_ptr->start_protocol_ver); } else { error("%s: Can not find pack job leader %pJ", __func__, job_ptr); } }
/* * srun_timeout - notify srun of a job's imminent timeout * IN job_ptr - pointer to the slurmctld job record */ extern void srun_timeout (struct job_record *job_ptr) { slurm_addr_t * addr; srun_timeout_msg_t *msg_arg; ListIterator step_iterator; struct step_record *step_ptr; xassert(job_ptr); if (!IS_JOB_RUNNING(job_ptr)) return; if (job_ptr->other_port && job_ptr->alloc_node && job_ptr->resp_host) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host); msg_arg = xmalloc(sizeof(srun_timeout_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->step_id = NO_VAL; msg_arg->timeout = job_ptr->end_time; _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_TIMEOUT, msg_arg, job_ptr->start_protocol_ver); } step_iterator = list_iterator_create(job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next(step_iterator))) srun_step_timeout(step_ptr, job_ptr->end_time); list_iterator_destroy(step_iterator); }
extern void msg_aggr_sender_init(char *host, uint16_t port, uint64_t window, uint64_t max_msg_cnt) { if (msg_collection.running || (max_msg_cnt <= 1)) return; memset(&msg_collection, 0, sizeof(msg_collection_type_t)); slurm_mutex_init(&msg_collection.aggr_mutex); slurm_mutex_init(&msg_collection.mutex); slurm_mutex_lock(&msg_collection.mutex); slurm_mutex_lock(&msg_collection.aggr_mutex); slurm_cond_init(&msg_collection.cond, NULL); slurm_set_addr(&msg_collection.node_addr, port, host); msg_collection.window = window; msg_collection.max_msg_cnt = max_msg_cnt; msg_collection.msg_aggr_list = list_create(_msg_aggr_free); msg_collection.msg_list = list_create(slurm_free_comp_msg_list); msg_collection.max_msgs = false; msg_collection.debug_flags = slurm_get_debug_flags(); slurm_mutex_unlock(&msg_collection.aggr_mutex); slurm_mutex_unlock(&msg_collection.mutex); slurm_thread_create(&msg_collection.thread_id, &_msg_aggregation_sender, NULL); }
/* Ping primary ControlMachine * RET 0 if no error */ static int _ping_controller(void) { int rc; slurm_msg_t req; /* Locks: Read configuration */ slurmctld_lock_t config_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; /* * Set address of controller to ping */ slurm_msg_t_init(&req); lock_slurmctld(config_read_lock); debug3("pinging slurmctld at %s", slurmctld_conf.control_addr); slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port, slurmctld_conf.control_addr); unlock_slurmctld(config_read_lock); req.msg_type = REQUEST_PING; if (slurm_send_recv_rc_msg_only_one(&req, &rc, 0) < 0) { error("_ping_controller/slurm_send_node_msg error: %m"); return SLURM_ERROR; } if (rc) { error("_ping_controller/response error %d", rc); return SLURM_PROTOCOL_ERROR; } return SLURM_PROTOCOL_SUCCESS; }
/* * srun_job_complete - notify srun of a job's termination * IN job_ptr - pointer to the slurmctld job record */ extern void srun_job_complete (struct job_record *job_ptr) { slurm_addr_t * addr; srun_job_complete_msg_t *msg_arg; ListIterator step_iterator; struct step_record *step_ptr; xassert(job_ptr); if (job_ptr->other_port && job_ptr->alloc_node && job_ptr->resp_host) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host); msg_arg = xmalloc(sizeof(srun_job_complete_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->step_id = NO_VAL; _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_JOB_COMPLETE, msg_arg); } step_iterator = list_iterator_create(job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next(step_iterator))) { if (step_ptr->batch_step) /* batch script itself */ continue; srun_step_complete(step_ptr); } list_iterator_destroy(step_iterator); }
/* srun_ping - ping all srun commands that have not been heard from recently */ extern void srun_ping (void) { ListIterator job_iterator; struct job_record *job_ptr; slurm_addr_t * addr; time_t now = time(NULL); time_t old = now - (slurmctld_conf.inactive_limit / 3) + slurmctld_conf.msg_timeout + 1; srun_ping_msg_t *msg_arg; if (slurmctld_conf.inactive_limit == 0) return; /* No limit, don't bother pinging */ job_iterator = list_iterator_create(job_list); while ((job_ptr = (struct job_record *) list_next(job_iterator))) { xassert (job_ptr->magic == JOB_MAGIC); if (!IS_JOB_RUNNING(job_ptr)) continue; if ((job_ptr->time_last_active <= old) && job_ptr->other_port && job_ptr->alloc_node && job_ptr->resp_host) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host); msg_arg = xmalloc(sizeof(srun_ping_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->step_id = NO_VAL; _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_PING, msg_arg); } } list_iterator_destroy(job_iterator); }
/* Forward keypair info to other tasks as required. * Clear message forward structure upon completion. * The messages are forwarded sequentially. */ static int _forward_comm_set(struct kvs_comm_set *kvs_set_ptr) { int i, rc = SLURM_SUCCESS; int tmp_host_cnt = kvs_set_ptr->host_cnt; slurm_msg_t msg_send; int msg_rc; kvs_set_ptr->host_cnt = 0; for (i=0; i<tmp_host_cnt; i++) { if (kvs_set_ptr->kvs_host_ptr[i].port == 0) continue; /* empty */ slurm_msg_t_init(&msg_send); msg_send.msg_type = PMI_KVS_GET_RESP; msg_send.data = (void *) kvs_set_ptr; slurm_set_addr(&msg_send.address, kvs_set_ptr->kvs_host_ptr[i].port, kvs_set_ptr->kvs_host_ptr[i].hostname); if (slurm_send_recv_rc_msg_only_one(&msg_send, &msg_rc, 0) < 0) { error("Could not forward msg to %s", kvs_set_ptr->kvs_host_ptr[i].hostname); msg_rc = 1; } rc = MAX(rc, msg_rc); xfree(kvs_set_ptr->kvs_host_ptr[i].hostname); } xfree(kvs_set_ptr->kvs_host_ptr); return rc; }
/* * srun_exec - request that srun execute a specific command * and route it's output to stdout * IN step_ptr - pointer to the slurmctld job step record * IN argv - command and arguments to execute */ extern void srun_exec(struct step_record *step_ptr, char **argv) { slurm_addr_t * addr; srun_exec_msg_t *msg_arg; int cnt = 1, i; xassert(step_ptr); if (step_ptr->port && step_ptr->host && step_ptr->host[0]) { for (i=0; argv[i]; i++) cnt++; /* start at 1 to include trailing NULL */ addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, step_ptr->port, step_ptr->host); msg_arg = xmalloc(sizeof(srun_exec_msg_t)); msg_arg->job_id = step_ptr->job_ptr->job_id; msg_arg->step_id = step_ptr->step_id; msg_arg->argc = cnt; msg_arg->argv = xmalloc(sizeof(char *) * cnt); for (i=0; i<cnt ; i++) msg_arg->argv[i] = xstrdup(argv[i]); _srun_agent_launch(addr, step_ptr->host, SRUN_EXEC, msg_arg); } else { error("srun_exec %u.%u lacks communication channel", step_ptr->job_ptr->job_id, step_ptr->step_id); } }
extern void slurm_setup_remote_working_cluster(resource_allocation_response_msg_t *msg) { xassert(msg); xassert(msg->working_cluster_rec); xassert(msg->node_list); xassert(msg->node_addr); if (working_cluster_rec) slurmdb_destroy_cluster_rec(working_cluster_rec); working_cluster_rec = (slurmdb_cluster_rec_t *)msg->working_cluster_rec; msg->working_cluster_rec = NULL; working_cluster_rec->plugin_id_select = select_get_plugin_id_pos(working_cluster_rec->plugin_id_select); slurm_set_addr(&working_cluster_rec->control_addr, working_cluster_rec->control_port, working_cluster_rec->control_host); if (setenvf(NULL, "SLURM_CLUSTER_NAME", "%s", working_cluster_rec->name) < 0) error("unable to set SLURM_CLUSTER_NAME in environment"); add_remote_nodes_to_conf_tbls(msg->node_list, msg->node_addr); }
extern void term_msg_thread(void) { slurm_mutex_lock(&thread_flag_mutex); if (thread_running) { int fd; slurm_addr_t addr; thread_shutdown = true; /* Open and close a connection to the plugin listening port. * Allows slurm_accept_msg_conn() to return in _msg_thread() * so that it can check the thread_shutdown flag. */ slurm_set_addr(&addr, nonstop_comm_port, "localhost"); fd = slurm_open_stream(&addr, true); if (fd != -1) { /* we don't care if the open failed */ slurm_close(fd); } debug2("waiting for slurmctld/nonstop thread to exit"); pthread_join(msg_thread_id, NULL); msg_thread_id = 0; thread_shutdown = false; thread_running = false; debug2("join of slurmctld/nonstop thread was successful"); } slurm_mutex_unlock(&thread_flag_mutex); }
static int _setup_srun_tree_info(const mpi_plugin_client_info_t *job) { char *p; uint16_t p_port; memset(&tree_info, 0, sizeof(tree_info)); tree_info.this_node = "launcher"; /* not used */ tree_info.parent_id = -2; /* not used */ tree_info.parent_node = NULL; /* not used */ tree_info.num_children = job_info.nnodes; tree_info.depth = 0; /* not used */ tree_info.max_depth = 0; /* not used */ /* pmi_port set in _setup_srun_sockets */ p = getenv(PMI2_SPAWNER_PORT_ENV); if (p) { /* spawned */ p_port = atoi(p); tree_info.srun_addr = xmalloc(sizeof(slurm_addr_t)); /* assume there is always a lo interface */ slurm_set_addr(tree_info.srun_addr, p_port, "127.0.0.1"); } else tree_info.srun_addr = NULL; snprintf(tree_sock_addr, 128, PMI2_SOCK_ADDR_FMT, job->jobid, job->stepid); /* init kvs seq to 0. TODO: reduce array size */ tree_info.children_kvs_seq = xmalloc(sizeof(uint32_t) * job_info.nnodes); return SLURM_SUCCESS; }
static void *_msg_thread(void *x) { struct msg_arg *msg_arg_ptr = (struct msg_arg *) x; int rc, success = 0, timeout; slurm_msg_t msg_send; slurm_msg_t_init(&msg_send); debug2("KVS_Barrier msg to %s:%u", msg_arg_ptr->bar_ptr->hostname, msg_arg_ptr->bar_ptr->port); msg_send.msg_type = PMI_KVS_GET_RESP; msg_send.data = (void *) msg_arg_ptr->kvs_ptr; slurm_set_addr(&msg_send.address, msg_arg_ptr->bar_ptr->port, msg_arg_ptr->bar_ptr->hostname); timeout = slurm_get_msg_timeout() * 10000; if (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { error("slurm_send_recv_rc_msg_only_one: %m"); } else if (rc != SLURM_SUCCESS) { error("KVS_Barrier confirm from %s, rc=%d", msg_arg_ptr->bar_ptr->hostname, rc); } else { /* successfully transmitted KVS keypairs */ success = 1; } slurm_mutex_lock(&agent_mutex); agent_cnt--; pthread_cond_signal(&agent_cond); slurm_mutex_unlock(&agent_mutex); xfree(x); return NULL; }
/*****************************************************************************\ * terminate message hander thread \*****************************************************************************/ extern void term_msg_thread(void) { pthread_mutex_lock(&thread_flag_mutex); if (thread_running) { int fd; slurm_addr_t addr; thread_shutdown = true; /* Open and close a connection to the listening port. * Allows slurm_accept_msg_conn() to return in * _msg_thread() so that it can check the thread_shutdown * flag. */ slurm_set_addr(&addr, sched_port, "localhost"); fd = slurm_open_stream(&addr); if (fd != -1) { /* we don't care if the open failed */ slurm_close_stream(fd); } debug2("waiting for dynalloc thread to exit"); pthread_join(msg_thread_id, NULL); msg_thread_id = 0; thread_shutdown = false; thread_running = false; debug2("join of dynalloc thread successful"); } pthread_mutex_unlock(&thread_flag_mutex); }
/* * srun_timeout - notify srun of a job's imminent timeout * IN job_ptr - pointer to the slurmctld job record */ extern void srun_timeout (struct job_record *job_ptr) { slurm_addr_t * addr; srun_timeout_msg_t *msg_arg; ListIterator step_iterator; struct step_record *step_ptr; xassert(job_ptr); if (!IS_JOB_RUNNING(job_ptr)) return; if (job_ptr->other_port && job_ptr->alloc_node && job_ptr->resp_host) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host); msg_arg = xmalloc(sizeof(srun_timeout_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->step_id = NO_VAL; msg_arg->timeout = job_ptr->end_time; _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_TIMEOUT, msg_arg); } step_iterator = list_iterator_create(job_ptr->step_list); while ((step_ptr = (struct step_record *) list_next(step_iterator))) { if ( (step_ptr->port == 0) || (step_ptr->host == NULL) || (step_ptr->batch_step) || (step_ptr->host[0] == '\0') ) continue; addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, step_ptr->port, step_ptr->host); msg_arg = xmalloc(sizeof(srun_timeout_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->step_id = step_ptr->step_id; msg_arg->timeout = job_ptr->end_time; _srun_agent_launch(addr, step_ptr->host, SRUN_TIMEOUT, msg_arg); } list_iterator_destroy(step_iterator); }
/* Open event_fd as needed * RET 0 on success, -1 on failure */ static int _open_fd(time_t now) { if (event_fd != -1) return 0; /* Identify address for socket connection. * Done only on first call, then cached. */ if (event_addr_set == 0) { slurm_set_addr(&moab_event_addr, e_port, e_host); event_addr_set = 1; if (e_host_bu[0] != '\0') { slurm_set_addr(&moab_event_addr_bu, e_port, e_host_bu); event_addr_set = 2; } } /* Open the event port on moab as needed */ if (event_fd == -1) { event_fd = slurm_open_msg_conn(&moab_event_addr); if (event_fd == -1) { error("Unable to open primary wiki " "event port %s:%u: %m", e_host, e_port); } } if ((event_fd == -1) && (event_addr_set == 2)) { event_fd = slurm_open_msg_conn(&moab_event_addr_bu); if (event_fd == -1) { error("Unable to open backup wiki " "event port %s:%u: %m", e_host_bu, e_port); } } if (event_fd == -1) return -1; /* We can't have the controller block on the following write() */ fd_set_nonblocking(event_fd); return 0; }
static void _setup_exec_srun(spawn_req_t *req) { char **env, env_key[32]; int i, rc; spawn_resp_t *resp; debug3("mpi/pmi2: in _setup_exec_srun"); /* setup environments */ env = env_array_copy((const char **)job_info.job_env); /* TODO: unset some env-vars */ env_array_overwrite_fmt(&env, "SLURM_JOB_ID", "%u", job_info.jobid); env_array_overwrite_fmt(&env, PMI2_SPAWNER_JOBID_ENV, "%s", job_info.pmi_jobid); env_array_overwrite_fmt(&env, PMI2_PMI_JOBID_ENV, "%s-%u", job_info.pmi_jobid, req->seq); env_array_overwrite_fmt(&env, PMI2_SPAWN_SEQ_ENV, "%u", req->seq); env_array_overwrite_fmt(&env, PMI2_SPAWNER_PORT_ENV, "%hu", tree_info.pmi_port); /* preput kvs */ env_array_overwrite_fmt(&env, PMI2_PREPUT_CNT_ENV, "%d", req->preput_cnt); for (i = 0; i < req->preput_cnt; i ++) { snprintf(env_key, 32, PMI2_PPKEY_ENV"%d", i); env_array_overwrite_fmt(&env, env_key, "%s", req->pp_keys[i]); snprintf(env_key, 32, PMI2_PPVAL_ENV"%d", i); env_array_overwrite_fmt(&env, env_key, "%s", req->pp_vals[i]); } if (req->subcmd_cnt == 1) { /* no return if success */ rc = _exec_srun_single(req, env); } else { /* no return if success */ rc = _exec_srun_multiple(req, env); } resp = spawn_resp_new(); resp->seq = req->seq; xstrfmtcat(resp->jobid, "%s-%u", job_info.pmi_jobid, req->seq); resp->error_cnt = 0; resp->rc = rc; /* fake a srun address */ tree_info.srun_addr = xmalloc(sizeof(slurm_addr_t)); slurm_set_addr(tree_info.srun_addr, tree_info.pmi_port, "127.0.0.1"); spawn_resp_send_to_srun(resp); spawn_resp_free(resp); exit(errno); }
/* * srun_user_message - Send arbitrary message to an srun job (no job steps) */ extern int srun_user_message(struct job_record *job_ptr, char *msg) { slurm_addr_t * addr; srun_user_msg_t *msg_arg; xassert(job_ptr); if (!IS_JOB_PENDING(job_ptr) && !IS_JOB_RUNNING(job_ptr)) return ESLURM_ALREADY_DONE; if (job_ptr->other_port && job_ptr->resp_host && job_ptr->resp_host[0]) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host); msg_arg = xmalloc(sizeof(srun_user_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->msg = xstrdup(msg); _srun_agent_launch(addr, job_ptr->resp_host, SRUN_USER_MSG, msg_arg); return SLURM_SUCCESS; } else if (job_ptr->batch_flag && IS_JOB_RUNNING(job_ptr)) { #ifndef HAVE_FRONT_END struct node_record *node_ptr; #endif job_notify_msg_t *notify_msg_ptr; agent_arg_t *agent_arg_ptr; #ifdef HAVE_FRONT_END if (job_ptr->batch_host == NULL) return ESLURM_DISABLED; /* no allocated nodes */ agent_arg_ptr = (agent_arg_t *) xmalloc(sizeof(agent_arg_t)); agent_arg_ptr->hostlist = hostlist_create(job_ptr->batch_host); #else node_ptr = find_first_node_record(job_ptr->node_bitmap); if (node_ptr == NULL) return ESLURM_DISABLED; /* no allocated nodes */ agent_arg_ptr = (agent_arg_t *) xmalloc(sizeof(agent_arg_t)); agent_arg_ptr->hostlist = hostlist_create(node_ptr->name); #endif if (agent_arg_ptr->hostlist == NULL) fatal("hostlist_create: malloc failure"); notify_msg_ptr = (job_notify_msg_t *) xmalloc(sizeof(job_notify_msg_t)); notify_msg_ptr->job_id = job_ptr->job_id; notify_msg_ptr->message = xstrdup(msg); agent_arg_ptr->node_count = 1; agent_arg_ptr->retry = 0; agent_arg_ptr->msg_type = REQUEST_JOB_NOTIFY; agent_arg_ptr->msg_args = (void *) notify_msg_ptr; /* Launch the RPC via agent */ agent_queue_request(agent_arg_ptr); return SLURM_SUCCESS; } return ESLURM_DISABLED; }
static int _get_addr(void) { char *env_host, *env_port; if (srun_port) return SLURM_SUCCESS; env_host = getenv("SLURM_SRUN_COMM_HOST"); env_port = getenv("SLURM_SRUN_COMM_PORT"); if (!env_host || !env_port) return SLURM_ERROR; srun_port = (uint16_t) atol(env_port); slurm_set_addr(&srun_addr, srun_port, env_host); return SLURM_SUCCESS; }
/* * srun_step_complete - notify srun of a job step's termination * IN step_ptr - pointer to the slurmctld job step record */ extern void srun_step_complete (struct step_record *step_ptr) { slurm_addr_t * addr; srun_job_complete_msg_t *msg_arg; xassert(step_ptr); if (step_ptr->port && step_ptr->host && step_ptr->host[0]) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, step_ptr->port, step_ptr->host); msg_arg = xmalloc(sizeof(srun_job_complete_msg_t)); msg_arg->job_id = step_ptr->job_ptr->job_id; msg_arg->step_id = step_ptr->step_id; _srun_agent_launch(addr, step_ptr->host, SRUN_JOB_COMPLETE, msg_arg); } }
/* * srun_allocate_abort - notify srun of a resource allocation failure * IN job_id - id of the job allocated resource */ extern void srun_allocate_abort(struct job_record *job_ptr) { if (job_ptr && job_ptr->alloc_resp_port && job_ptr->alloc_node && job_ptr->resp_host) { slurm_addr_t * addr; srun_job_complete_msg_t *msg_arg; addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->alloc_resp_port, job_ptr->resp_host); msg_arg = xmalloc(sizeof(srun_timeout_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->step_id = NO_VAL; _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_JOB_COMPLETE, msg_arg); } }
/* * srun_step_signal - notify srun that a job step should be signalled * NOTE: Needed on BlueGene/Q to signal runjob process * IN step_ptr - pointer to the slurmctld job step record * IN signal - signal number */ extern void srun_step_signal (struct step_record *step_ptr, uint16_t signal) { slurm_addr_t * addr; job_step_kill_msg_t *msg_arg; xassert(step_ptr); if (step_ptr->port && step_ptr->host && step_ptr->host[0]) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, step_ptr->port, step_ptr->host); msg_arg = xmalloc(sizeof(job_step_kill_msg_t)); msg_arg->job_id = step_ptr->job_ptr->job_id; msg_arg->job_step_id = step_ptr->step_id; msg_arg->signal = signal; _srun_agent_launch(addr, step_ptr->host, SRUN_STEP_SIGNAL, msg_arg); } }
/* * srun_step_missing - notify srun that a job step is missing from * a node we expect to find it on * IN step_ptr - pointer to the slurmctld job step record * IN node_list - name of nodes we did not find the step on */ extern void srun_step_missing (struct step_record *step_ptr, char *node_list) { slurm_addr_t * addr; srun_step_missing_msg_t *msg_arg; xassert(step_ptr); if (step_ptr->port && step_ptr->host && step_ptr->host[0]) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, step_ptr->port, step_ptr->host); msg_arg = xmalloc(sizeof(srun_step_missing_msg_t)); msg_arg->job_id = step_ptr->job_ptr->job_id; msg_arg->step_id = step_ptr->step_id; msg_arg->nodelist = xstrdup(node_list); _srun_agent_launch(addr, step_ptr->host, SRUN_STEP_MISSING, msg_arg); } }
/* * Tell the primary_controller to relinquish control, primary control_machine * has to suspend operation * Based on _shutdown_backup_controller from controller.c * wait_time - How long to wait for primary controller to write state, seconds. * RET 0 or an error code * NOTE: READ lock_slurmctld config before entry (or be single-threaded) */ static int _shutdown_primary_controller(int wait_time) { int rc; slurm_msg_t req; slurm_msg_t_init(&req); if ((slurmctld_conf.control_addr == NULL) || (slurmctld_conf.control_addr[0] == '\0')) { error("_shutdown_primary_controller: " "no primary controller to shutdown"); return SLURM_ERROR; } slurm_set_addr(&req.address, slurmctld_conf.slurmctld_port, slurmctld_conf.control_addr); /* send request message */ req.msg_type = REQUEST_CONTROL; if (slurm_send_recv_rc_msg_only_one(&req, &rc, (CONTROL_TIMEOUT * 1000)) < 0) { error("_shutdown_primary_controller:send/recv: %m"); return SLURM_ERROR; } if (rc == ESLURM_DISABLED) debug("primary controller responding"); else if (rc == 0) { debug("primary controller has relinquished control"); } else { error("_shutdown_primary_controller: %s", slurm_strerror(rc)); return SLURM_ERROR; } /* FIXME: Ideally the REQUEST_CONTROL RPC does not return until all * other activity has ceased and the state has been saved. That is * not presently the case (it returns when no other work is pending, * so the state save should occur right away). We sleep for a while * here and give the primary controller time to shutdown */ if (wait_time) sleep(wait_time); return SLURM_SUCCESS; }
/* * srun_allocate - notify srun of a resource allocation * IN job_id - id of the job allocated resource */ extern void srun_allocate (uint32_t job_id) { struct job_record *job_ptr = find_job_record (job_id); xassert(job_ptr); if (job_ptr && job_ptr->alloc_resp_port && job_ptr->alloc_node && job_ptr->resp_host && job_ptr->job_resrcs && job_ptr->job_resrcs->cpu_array_cnt) { slurm_addr_t * addr; resource_allocation_response_msg_t *msg_arg; job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs; addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->alloc_resp_port, job_ptr->resp_host); msg_arg = xmalloc(sizeof(resource_allocation_response_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->node_list = xstrdup(job_ptr->nodes); msg_arg->alias_list = xstrdup(job_ptr->alias_list); msg_arg->num_cpu_groups = job_resrcs_ptr->cpu_array_cnt; msg_arg->cpus_per_node = xmalloc(sizeof(uint16_t) * job_resrcs_ptr->cpu_array_cnt); if (job_ptr->details) { msg_arg->pn_min_memory = job_ptr->details-> pn_min_memory; } memcpy(msg_arg->cpus_per_node, job_resrcs_ptr->cpu_array_value, (sizeof(uint16_t) * job_resrcs_ptr->cpu_array_cnt)); msg_arg->cpu_count_reps = xmalloc(sizeof(uint32_t) * job_resrcs_ptr->cpu_array_cnt); memcpy(msg_arg->cpu_count_reps, job_resrcs_ptr->cpu_array_reps, (sizeof(uint32_t) * job_resrcs_ptr->cpu_array_cnt)); msg_arg->node_cnt = job_ptr->node_cnt; msg_arg->select_jobinfo = select_g_select_jobinfo_copy( job_ptr->select_jobinfo); msg_arg->error_code = SLURM_SUCCESS; _srun_agent_launch(addr, job_ptr->alloc_node, RESPONSE_RESOURCE_ALLOCATION, msg_arg); } }
/* * srun_step_timeout - notify srun of a job step's imminent timeout * IN step_ptr - pointer to the slurmctld step record * IN timeout_val - when it is going to time out */ extern void srun_step_timeout(struct step_record *step_ptr, time_t timeout_val) { slurm_addr_t *addr; srun_timeout_msg_t *msg_arg; xassert(step_ptr); if (step_ptr->batch_step || !step_ptr->port || !step_ptr->host || (step_ptr->host[0] == '\0')) return; addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, step_ptr->port, step_ptr->host); msg_arg = xmalloc(sizeof(srun_timeout_msg_t)); msg_arg->job_id = step_ptr->job_ptr->job_id; msg_arg->step_id = step_ptr->step_id; msg_arg->timeout = timeout_val; _srun_agent_launch(addr, step_ptr->host, SRUN_TIMEOUT, msg_arg, step_ptr->start_protocol_ver); }
/* * srun_job_suspend - notify salloc of suspend/resume operation * IN job_ptr - pointer to the slurmctld job record * IN op - SUSPEND_JOB or RESUME_JOB (enum suspend_opts from slurm.h) * RET - true if message send, otherwise false */ extern bool srun_job_suspend (struct job_record *job_ptr, uint16_t op) { slurm_addr_t * addr; suspend_msg_t *msg_arg; bool msg_sent = false; xassert(job_ptr); if (job_ptr->other_port && job_ptr->alloc_node && job_ptr->resp_host) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host); msg_arg = xmalloc(sizeof(suspend_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->op = op; _srun_agent_launch(addr, job_ptr->alloc_node, SRUN_REQUEST_SUSPEND, msg_arg); msg_sent = true; } return msg_sent; }
extern void msg_aggr_sender_init(char *host, uint16_t port, uint64_t window, uint64_t max_msg_cnt) { pthread_attr_t attr; int retries = 0; if (msg_collection.running || (max_msg_cnt <= 1)) return; memset(&msg_collection, 0, sizeof(msg_collection_type_t)); slurm_mutex_init(&msg_collection.aggr_mutex); slurm_mutex_init(&msg_collection.mutex); slurm_mutex_lock(&msg_collection.mutex); slurm_mutex_lock(&msg_collection.aggr_mutex); pthread_cond_init(&msg_collection.cond, NULL); slurm_set_addr(&msg_collection.node_addr, port, host); msg_collection.window = window; msg_collection.max_msg_cnt = max_msg_cnt; msg_collection.msg_aggr_list = list_create(_msg_aggr_free); msg_collection.msg_list = list_create(slurm_free_comp_msg_list); msg_collection.max_msgs = false; msg_collection.debug_flags = slurm_get_debug_flags(); slurm_mutex_unlock(&msg_collection.aggr_mutex); slurm_mutex_unlock(&msg_collection.mutex); slurm_attr_init(&attr); while (pthread_create(&msg_collection.thread_id, &attr, &_msg_aggregation_sender, NULL)) { error("msg_aggr_sender_init: pthread_create: %m"); if (++retries > 3) fatal("msg_aggr_sender_init: pthread_create: %m"); usleep(10); /* sleep and again */ } return; }
static void _setup_env_working_cluster(void) { char *working_env = NULL; if ((working_env = xstrdup(getenv("SLURM_WORKING_CLUSTER")))) { char *addr_ptr, *port_ptr, *rpc_ptr; if (!(addr_ptr = strchr(working_env, ':')) || !(port_ptr = strchr(addr_ptr + 1, ':')) || !(rpc_ptr = strchr(port_ptr + 1, ':'))) { error("malformed cluster addr and port in SLURM_WORKING_CLUSTER env var: '%s'", working_env); exit(1); } *addr_ptr++ = '\0'; *port_ptr++ = '\0'; *rpc_ptr++ = '\0'; if (strcmp(slurmctld_conf.cluster_name, working_env)) { working_cluster_rec = xmalloc(sizeof(slurmdb_cluster_rec_t)); slurmdb_init_cluster_rec(working_cluster_rec, false); working_cluster_rec->control_host = xstrdup(addr_ptr);; working_cluster_rec->control_port = strtol(port_ptr, NULL, 10); working_cluster_rec->rpc_version = strtol(rpc_ptr, NULL, 10); slurm_set_addr(&working_cluster_rec->control_addr, working_cluster_rec->control_port, working_cluster_rec->control_host); } xfree(working_env); } }
/* * _set_collectors call the split_hostlist API on the all nodes hostlist * to set the node to be used as a collector for unsolicited node aggregation. * * If this node is a forwarding node (first node in any hostlist), * then its collector and backup are the ControlMachine and it's backup. * * Otherwise, we find the hostlist containing this node. * The forwarding node in that hostlist becomes a collector, the next node * which is not this node becomes the backup. * That list is split, we iterate through it and searching for a list in * which this node is a forwarding node. If found, we set the collector and * backup, else this process is repeated. */ static void _set_collectors(char *this_node_name) { slurm_ctl_conf_t *conf; hostlist_t nodes; hostlist_t* hll = NULL; char *parent = NULL, *backup = NULL; char addrbuf[32]; int i, j, f = -1; int hl_count = 0; uint16_t parent_port; uint16_t backup_port; bool found = false; bool ctldparent = true; #ifdef HAVE_FRONT_END return; /* on a FrontEnd system this would never be useful. */ #endif if (!run_in_daemon("slurmd")) return; /* Only compute nodes have collectors */ /* Set the initial iteration, collector is controller, * full list is split */ xassert(this_node_name); conf = slurm_conf_lock(); nodes = _get_all_nodes(); parent = strdup(conf->control_addr); if (conf->backup_addr) { backup = strdup(conf->backup_addr); } parent_port = conf->slurmctld_port; backup_port = parent_port; slurm_conf_unlock(); while (!found) { if ( route_g_split_hostlist(nodes, &hll, &hl_count) ) { error("unable to split forward hostlist"); goto clean; /* collector addrs remains null */ } /* Find which hostlist contains this node */ for (i=0; i < hl_count; i++) { f = hostlist_find(hll[i], this_node_name); if (f != -1) break; } if (i == hl_count) { fatal("ROUTE -- %s not found in node_record_table", this_node_name); } if (f == 0) { /* we are a forwarded to node, * so our parent is parent */ if (hostlist_count(hll[i]) > 1) this_is_collector = true; xfree(msg_collect_node); msg_collect_node = xmalloc(sizeof(slurm_addr_t)); if (ctldparent) slurm_set_addr(msg_collect_node, parent_port, parent); else { slurm_conf_get_addr(parent, msg_collect_node); msg_collect_node->sin_port = htons(parent_port); } if (debug_flags & DEBUG_FLAG_ROUTE) { slurm_print_slurm_addr(msg_collect_node, addrbuf, 32); info("ROUTE -- message collector address is %s", addrbuf); } xfree(msg_collect_backup); if (backup) { msg_collect_backup = xmalloc(sizeof(slurm_addr_t)); if (ctldparent) { slurm_set_addr(msg_collect_backup, backup_port, backup); } else { slurm_conf_get_addr(backup, msg_collect_backup); msg_collect_backup->sin_port = htons(backup_port); } if (debug_flags & DEBUG_FLAG_ROUTE) { slurm_print_slurm_addr( msg_collect_backup, addrbuf, 32); info("ROUTE -- message collector backup" " address is %s", addrbuf); } } else { if (debug_flags & DEBUG_FLAG_ROUTE) { info("ROUTE -- no message collector " "backup"); } } found = true; goto clean; } /* We are not a forwarding node, the first node in this list * will split the forward_list. * We also know that the forwarding node is not a controller. * * clean up parent context */ ctldparent = false; hostlist_destroy(nodes); if (parent) free(parent); if (backup) free(backup); nodes = hostlist_copy(hll[i]); for (j=0; j < hl_count; j++) { hostlist_destroy(hll[j]); } xfree(hll); /* set our parent, backup, and continue search */ parent = hostlist_shift(nodes); backup = hostlist_nth(nodes, 0); if (strcmp(backup, this_node_name) == 0) { free(backup); backup = NULL; if (hostlist_count(nodes) > 1) backup = hostlist_nth(nodes, 1); } parent_port = slurm_conf_get_port(parent); if (backup) { backup_port = slurm_conf_get_port(backup); } else backup_port = 0; } clean: if (debug_flags & DEBUG_FLAG_ROUTE) { if (this_is_collector) info("ROUTE -- %s is a collector node", this_node_name); else info("ROUTE -- %s is a leaf node", this_node_name); } hostlist_destroy(nodes); if (parent) free(parent); if (backup) free(backup); for (i=0; i < hl_count; i++) { hostlist_destroy(hll[i]); } xfree(hll); }