static int _pmix_p2p_send_core(const char *nodename, const char *address, const char *data, uint32_t len) { int rc, timeout; slurm_msg_t msg; forward_data_msg_t req; List ret_list; ret_data_info_t *ret_data_info = NULL; pmixp_debug_hang(0); slurm_msg_t_init(&msg); PMIXP_DEBUG("nodelist=%s, address=%s, len=%u", nodename, address, len); req.address = (char *)address; req.len = len; /* there is not much we can do - just cast) */ req.data = (char*)data; msg.msg_type = REQUEST_FORWARD_DATA; msg.data = &req; if (slurm_conf_get_addr(nodename, &msg.address) == SLURM_ERROR) { PMIXP_ERROR("Can't find address for host " "%s, check slurm.conf", nodename); return SLURM_ERROR; } timeout = slurm_get_msg_timeout() * 1000; msg.forward.timeout = timeout; msg.forward.cnt = 0; msg.forward.nodelist = NULL; ret_list = slurm_send_addr_recv_msgs(&msg, (char*)nodename, timeout); if (!ret_list) { /* This should never happen (when this was * written slurm_send_addr_recv_msgs always * returned a list */ PMIXP_ERROR("No return list given from " "slurm_send_addr_recv_msgs spawned for %s", nodename); return SLURM_ERROR; } else if ((errno != SLURM_COMMUNICATIONS_CONNECTION_ERROR) && !list_count(ret_list)) { PMIXP_ERROR("failed to send to %s, errno=%d", nodename, errno); return SLURM_ERROR; } rc = SLURM_SUCCESS; while ((ret_data_info = list_pop(ret_list))) { int temp_rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); if (temp_rc != SLURM_SUCCESS) rc = temp_rc; destroy_data_info(ret_data_info); } FREE_NULL_LIST(ret_list); return rc; }
static int _local_send_recv_rc_msgs(const char *nodelist, slurm_msg_type_t type, void *data) { List ret_list = NULL; int temp_rc = 0, rc = 0; ret_data_info_t *ret_data_info = NULL; slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); msg->msg_type = type; msg->data = data; if ((ret_list = slurm_send_recv_msgs(nodelist, msg, 0, false))) { while ((ret_data_info = list_pop(ret_list))) { temp_rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); if (temp_rc) rc = temp_rc; } } else { error("slurm_signal_job: no list was returned"); rc = SLURM_ERROR; } slurm_free_msg(msg); return rc; }
static int _ping_controller(slurmdb_cluster_rec_t *cluster) { int rc = SLURM_SUCCESS; slurm_msg_t req_msg; slurm_msg_t resp_msg; slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); req_msg.msg_type = REQUEST_PING; slurm_mutex_lock(&cluster->lock); if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("pinging %s(%s:%d)", cluster->name, cluster->control_host, cluster->control_port); if ((rc = _send_recv_msg(cluster, &req_msg, &resp_msg, true))) { error("failed to ping %s(%s:%d)", cluster->name, cluster->control_host, cluster->control_port); } else if ((rc = slurm_get_return_code(resp_msg.msg_type, resp_msg.data))) error("ping returned error from %s(%s:%d)", cluster->name, cluster->control_host, cluster->control_port); if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("finished pinging %s(%s:%d)", cluster->name, cluster->control_host, cluster->control_port); slurm_mutex_unlock(&cluster->lock); slurm_free_msg_members(&req_msg); slurm_free_msg_members(&resp_msg); return rc; }
extern int checkpoint_tasks (uint32_t job_id, uint32_t step_id, time_t begin_time, char *image_dir, uint16_t wait, char *nodelist) { int rc = SLURM_SUCCESS, temp_rc; checkpoint_tasks_msg_t ckpt_req; slurm_msg_t req_msg; List ret_list; ret_data_info_t *ret_data_info = NULL; slurm_msg_t_init(&req_msg); ckpt_req.job_id = job_id; ckpt_req.job_step_id = step_id; ckpt_req.timestamp = begin_time; ckpt_req.image_dir = image_dir; req_msg.msg_type = REQUEST_CHECKPOINT_TASKS; req_msg.data = &ckpt_req; if ((ret_list = slurm_send_recv_msgs(nodelist, &req_msg, (wait*1000), false))) { while ((ret_data_info = list_pop(ret_list))) { temp_rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); if (temp_rc) rc = temp_rc; } } else { error("slurm_checkpoint_tasks: no list was returned"); rc = SLURM_ERROR; } slurm_seterrno(rc); return rc; }
static int _send_to_stepds(hostlist_t hl, const char *addr, uint32_t len, char *data) { List ret_list = NULL; int temp_rc = 0, rc = 0; ret_data_info_t *ret_data_info = NULL; slurm_msg_t *msg = xmalloc(sizeof(slurm_msg_t)); forward_data_msg_t req; char *nodelist = NULL; slurm_msg_t_init(msg); req.address = xstrdup(addr); req.len = len; req.data = data; msg->msg_type = REQUEST_FORWARD_DATA; msg->data = &req; nodelist = hostlist_ranged_string_xmalloc(hl); if ((ret_list = slurm_send_recv_msgs(nodelist, msg, 0, false))) { while ((ret_data_info = list_pop(ret_list))) { temp_rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); if (temp_rc) { rc = temp_rc; } else { hostlist_delete_host(hl, ret_data_info->node_name); } } } else { error("tree_msg_to_stepds: no list was returned"); rc = SLURM_ERROR; } slurm_free_msg(msg); xfree(nodelist); xfree(req.address); return rc; }
/* Issue the RPC to transfer the file's data */ static int _file_bcast(struct bcast_parameters *params, file_bcast_msg_t *bcast_msg, job_sbcast_cred_msg_t *sbcast_cred) { List ret_list = NULL; ListIterator itr; ret_data_info_t *ret_data_info = NULL; int rc = 0, msg_rc; slurm_msg_t msg; slurm_msg_t_init(&msg); msg.data = bcast_msg; msg.msg_type = REQUEST_FILE_BCAST; ret_list = slurm_send_recv_msgs( sbcast_cred->node_list, &msg, params->timeout, true); if (ret_list == NULL) { error("slurm_send_recv_msgs: %m"); exit(1); } itr = list_iterator_create(ret_list); while ((ret_data_info = list_next(itr))) { msg_rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); if (msg_rc == SLURM_SUCCESS) continue; error("REQUEST_FILE_BCAST(%s): %s", ret_data_info->node_name, slurm_strerror(msg_rc)); rc = MAX(rc, msg_rc); } list_iterator_destroy(itr); FREE_NULL_LIST(ret_list); return rc; }
int _send_message_controller (enum controller_id dest, slurm_msg_t *req) { int rc = SLURM_PROTOCOL_SUCCESS; slurm_fd_t fd = -1; slurm_msg_t *resp_msg = NULL; /* always going to one node (primary or backup per value of "dest") */ if ((fd = slurm_open_controller_conn_spec(dest)) < 0) slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_CONNECTION_ERROR); if (slurm_send_node_msg(fd, req) < 0) { slurm_shutdown_msg_conn(fd); slurm_seterrno_ret(SLURMCTLD_COMMUNICATIONS_SEND_ERROR); } resp_msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(resp_msg); if((rc = slurm_receive_msg(fd, resp_msg, 0)) != 0) { slurm_shutdown_msg_conn(fd); return SLURMCTLD_COMMUNICATIONS_RECEIVE_ERROR; } if (slurm_shutdown_msg_conn(fd) != SLURM_SUCCESS) rc = SLURMCTLD_COMMUNICATIONS_SHUTDOWN_ERROR; else if (resp_msg->msg_type != RESPONSE_SLURM_RC) rc = SLURM_UNEXPECTED_MSG_ERROR; else rc = slurm_get_return_code(resp_msg->msg_type, resp_msg->data); slurm_free_msg(resp_msg); if (rc) slurm_seterrno_ret(rc); return rc; }
/* * slurm_job_step_get_pids - get the complete list of pids for a given * job step * * IN job_id * IN step_id * IN node_list, optional, if NULL then all nodes in step are returned. * OUT resp * RET SLURM_SUCCESS on success SLURM_ERROR else */ extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id, char *node_list, job_step_pids_response_msg_t **resp) { int rc = SLURM_SUCCESS; slurm_msg_t req_msg; job_step_id_msg_t req; ListIterator itr; List ret_list = NULL; ret_data_info_t *ret_data_info = NULL; slurm_step_layout_t *step_layout = NULL; job_step_pids_response_msg_t *resp_out; bool created = 0; xassert(resp); if(!node_list) { if(!(step_layout = slurm_job_step_layout_get(job_id, step_id))) { rc = errno; error("slurm_job_step_get_pids: " "problem getting step_layout for %u.%u: %s", job_id, step_id, slurm_strerror(rc)); return rc; } node_list = step_layout->node_list; } if(!*resp) { resp_out = xmalloc(sizeof(job_step_pids_response_msg_t)); *resp = resp_out; created = 1; } else resp_out = *resp; debug("slurm_job_step_get_pids: " "getting pid information of job %u.%u on nodes %s", job_id, step_id, node_list); slurm_msg_t_init(&req_msg); memset(&req, 0, sizeof(job_step_id_msg_t)); resp_out->job_id = req.job_id = job_id; resp_out->step_id = req.step_id = step_id; req_msg.msg_type = REQUEST_JOB_STEP_PIDS; req_msg.data = &req; if(!(ret_list = slurm_send_recv_msgs(node_list, &req_msg, 0, false))) { error("slurm_job_step_get_pids: got an error no list returned"); rc = SLURM_ERROR; if(created) { slurm_job_step_pids_response_msg_free(resp_out); *resp = NULL; } goto cleanup; } itr = list_iterator_create(ret_list); while((ret_data_info = list_next(itr))) { switch (ret_data_info->type) { case RESPONSE_JOB_STEP_PIDS: if(!resp_out->pid_list) resp_out->pid_list = list_create( slurm_free_job_step_pids); list_push(resp_out->pid_list, ret_data_info->data); ret_data_info->data = NULL; break; case RESPONSE_SLURM_RC: rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); error("slurm_job_step_get_pids: " "there was an error with the " "list pid request rc = %s", slurm_strerror(rc)); break; default: rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); error("slurm_job_step_get_pids: " "unknown return given %d rc = %s", ret_data_info->type, slurm_strerror(rc)); break; } } list_iterator_destroy(itr); list_destroy(ret_list); if(resp_out->pid_list) list_sort(resp_out->pid_list, (ListCmpF)_sort_pids_by_name); cleanup: slurm_step_layout_destroy(step_layout); return rc; }
/* * _thread_per_group_rpc - thread to issue an RPC for a group of nodes * sending message out to one and forwarding it to * others if necessary. * IN/OUT args - pointer to task_info_t, xfree'd on completion */ static void *_thread_per_group_rpc(void *args) { int rc = SLURM_SUCCESS; slurm_msg_t msg; task_info_t *task_ptr = (task_info_t *) args; /* we cache some pointers from task_info_t because we need * to xfree args before being finished with their use. xfree * is required for timely termination of this pthread because * xfree could lock it at the end, preventing a timely * thread_exit */ pthread_mutex_t *thread_mutex_ptr = task_ptr->thread_mutex_ptr; pthread_cond_t *thread_cond_ptr = task_ptr->thread_cond_ptr; uint32_t *threads_active_ptr = task_ptr->threads_active_ptr; thd_t *thread_ptr = task_ptr->thread_struct_ptr; state_t thread_state = DSH_NO_RESP; slurm_msg_type_t msg_type = task_ptr->msg_type; bool is_kill_msg, srun_agent; List ret_list = NULL; ListIterator itr; ret_data_info_t *ret_data_info = NULL; int found = 0; int sig_array[2] = {SIGUSR1, 0}; /* Locks: Write job, write node */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; xassert(args != NULL); xsignal(SIGUSR1, _sig_handler); xsignal_unblock(sig_array); is_kill_msg = ( (msg_type == REQUEST_KILL_TIMELIMIT) || (msg_type == REQUEST_TERMINATE_JOB) ); srun_agent = ( (msg_type == SRUN_PING) || (msg_type == SRUN_EXEC) || (msg_type == SRUN_JOB_COMPLETE) || (msg_type == SRUN_STEP_MISSING) || (msg_type == SRUN_TIMEOUT) || (msg_type == SRUN_USER_MSG) || (msg_type == RESPONSE_RESOURCE_ALLOCATION) || (msg_type == SRUN_NODE_FAIL) ); thread_ptr->start_time = time(NULL); slurm_mutex_lock(thread_mutex_ptr); thread_ptr->state = DSH_ACTIVE; thread_ptr->end_time = thread_ptr->start_time + COMMAND_TIMEOUT; slurm_mutex_unlock(thread_mutex_ptr); /* send request message */ slurm_msg_t_init(&msg); msg.msg_type = msg_type; msg.data = task_ptr->msg_args_ptr; #if 0 info("sending message type %u to %s", msg_type, thread_ptr->nodelist); #endif if (task_ptr->get_reply) { if(thread_ptr->addr) { msg.address = *thread_ptr->addr; if(!(ret_list = slurm_send_addr_recv_msgs( &msg, thread_ptr->nodelist, 0))) { error("_thread_per_group_rpc: " "no ret_list given"); goto cleanup; } } else { if(!(ret_list = slurm_send_recv_msgs( thread_ptr->nodelist, &msg, 0, true))) { error("_thread_per_group_rpc: " "no ret_list given"); goto cleanup; } } } else { if(thread_ptr->addr) { //info("got the address"); msg.address = *thread_ptr->addr; } else { //info("no address given"); if(slurm_conf_get_addr(thread_ptr->nodelist, &msg.address) == SLURM_ERROR) { error("_thread_per_group_rpc: " "can't find address for host %s, " "check slurm.conf", thread_ptr->nodelist); goto cleanup; } } //info("sending %u to %s", msg_type, thread_ptr->nodelist); if (slurm_send_only_node_msg(&msg) == SLURM_SUCCESS) { thread_state = DSH_DONE; } else { if (!srun_agent) _comm_err(thread_ptr->nodelist, msg_type); } goto cleanup; } //info("got %d messages back", list_count(ret_list)); found = 0; itr = list_iterator_create(ret_list); while ((ret_data_info = list_next(itr)) != NULL) { rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); /* SPECIAL CASE: Mark node as IDLE if job already complete */ if (is_kill_msg && (rc == ESLURMD_KILL_JOB_ALREADY_COMPLETE)) { kill_job_msg_t *kill_job; kill_job = (kill_job_msg_t *) task_ptr->msg_args_ptr; rc = SLURM_SUCCESS; lock_slurmctld(job_write_lock); if (job_epilog_complete(kill_job->job_id, ret_data_info-> node_name, rc)) run_scheduler = true; unlock_slurmctld(job_write_lock); } /* SPECIAL CASE: Kill non-startable batch job, * Requeue the job on ESLURMD_PROLOG_FAILED */ if ((msg_type == REQUEST_BATCH_JOB_LAUNCH) && (rc != SLURM_SUCCESS) && (rc != ESLURMD_PROLOG_FAILED) && (ret_data_info->type != RESPONSE_FORWARD_FAILED)) { batch_job_launch_msg_t *launch_msg_ptr = task_ptr->msg_args_ptr; uint32_t job_id = launch_msg_ptr->job_id; info("Killing non-startable batch job %u: %s", job_id, slurm_strerror(rc)); thread_state = DSH_DONE; ret_data_info->err = thread_state; lock_slurmctld(job_write_lock); job_complete(job_id, 0, false, false, _wif_status()); unlock_slurmctld(job_write_lock); continue; } if (((msg_type == REQUEST_SIGNAL_TASKS) || (msg_type == REQUEST_TERMINATE_TASKS)) && (rc == ESRCH)) { /* process is already dead, not a real error */ rc = SLURM_SUCCESS; } switch (rc) { case SLURM_SUCCESS: /* debug("agent processed RPC to node %s", */ /* ret_data_info->node_name); */ thread_state = DSH_DONE; break; case SLURM_UNKNOWN_FORWARD_ADDR: error("We were unable to forward message to '%s'. " "Make sure the slurm.conf for each slurmd " "contain all other nodes in your system.", ret_data_info->node_name); thread_state = DSH_NO_RESP; break; case ESLURMD_EPILOG_FAILED: error("Epilog failure on host %s, " "setting DOWN", ret_data_info->node_name); thread_state = DSH_FAILED; break; case ESLURMD_PROLOG_FAILED: thread_state = DSH_FAILED; break; case ESLURM_INVALID_JOB_ID: /* Not indicative of a real error */ case ESLURMD_JOB_NOTRUNNING: /* Not indicative of a real error */ debug2("agent processed RPC to node %s: %s", ret_data_info->node_name, slurm_strerror(rc)); thread_state = DSH_DONE; break; default: if (!srun_agent) { if (ret_data_info->err) errno = ret_data_info->err; else errno = rc; rc = _comm_err(ret_data_info->node_name, msg_type); } if (srun_agent) thread_state = DSH_FAILED; else if(ret_data_info->type == RESPONSE_FORWARD_FAILED) /* check if a forward failed */ thread_state = DSH_NO_RESP; else { /* some will fail that don't mean anything went * bad like a job term request on a job that is * already finished, we will just exit on those * cases */ thread_state = DSH_DONE; } } ret_data_info->err = thread_state; } list_iterator_destroy(itr); cleanup: xfree(args); /* handled at end of thread just in case resend is needed */ destroy_forward(&msg.forward); slurm_mutex_lock(thread_mutex_ptr); thread_ptr->ret_list = ret_list; thread_ptr->state = thread_state; thread_ptr->end_time = (time_t) difftime(time(NULL), thread_ptr->start_time); /* Signal completion so another thread can replace us */ (*threads_active_ptr)--; pthread_cond_signal(thread_cond_ptr); slurm_mutex_unlock(thread_mutex_ptr); return (void *) NULL; }