/* * slurm_create_reservation - create a new reservation, only usable by user root * IN resv_msg - description of reservation * RET name of reservation on success (caller must free the memory), * otherwise return NULL and set errno to indicate the error */ char * slurm_create_reservation (resv_desc_msg_t * resv_msg) { int rc; char *resv_name = NULL; slurm_msg_t req_msg; slurm_msg_t resp_msg; reservation_name_msg_t *resp; slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); req_msg.msg_type = REQUEST_CREATE_RESERVATION; req_msg.data = resv_msg; rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg); switch (resp_msg.msg_type) { case RESPONSE_CREATE_RESERVATION: resp = (reservation_name_msg_t *) resp_msg.data; resv_name = strdup(resp->name); break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; if (rc) slurm_seterrno(rc); break; default: slurm_seterrno(SLURM_UNEXPECTED_MSG_ERROR); } slurm_free_msg_data(resp_msg.msg_type, resp_msg.data); return resv_name; }
/* * _suspend_op2 - perform a suspend/resume operation for some job. * IN op - operation to perform * IN job_id_str - job on which to perform operation in string format or NULL * OUT resp - slurm error codes by job array task ID * RET 0 or a slurm error code * NOTE: Supply either job_id NO_VAL or job_id_str as NULL, not both */ static int _suspend_op2(uint16_t op, char *job_id_str, job_array_resp_msg_t **resp) { int rc = SLURM_SUCCESS; suspend_msg_t sus_req; slurm_msg_t req_msg, resp_msg; slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); sus_req.op = op; sus_req.job_id = NO_VAL; sus_req.job_id_str = job_id_str; req_msg.msg_type = REQUEST_SUSPEND; req_msg.data = &sus_req; rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg); switch (resp_msg.msg_type) { case RESPONSE_JOB_ARRAY_ERRORS: *resp = (job_array_resp_msg_t *) resp_msg.data; break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; if (rc) slurm_seterrno(rc); break; default: slurm_seterrno(SLURM_UNEXPECTED_MSG_ERROR); } return rc; }
/* * slurm_update_job2 - issue RPC to a job's configuration per request, * only usable by user root or (for some parameters) the job's owner * IN job_msg - description of job updates * OUT resp - per task response to the request, * free using slurm_free_job_array_resp() * RET 0 on success, otherwise return -1 and set errno to indicate the error */ extern int slurm_update_job2 (job_desc_msg_t * job_msg, job_array_resp_msg_t **resp) { int rc = SLURM_SUCCESS; slurm_msg_t req_msg, resp_msg; slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); req_msg.msg_type = REQUEST_UPDATE_JOB; req_msg.data = job_msg; rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg); switch (resp_msg.msg_type) { case RESPONSE_JOB_ARRAY_ERRORS: *resp = (job_array_resp_msg_t *) resp_msg.data; break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; if (rc) slurm_seterrno(rc); break; default: slurm_seterrno(SLURM_UNEXPECTED_MSG_ERROR); } return rc; }
/* * slurm_requeue2 - re-queue a batch job, if already running * then terminate it first * IN job_id_str - job on which to perform operation in string format or NULL * IN state - state in which to place the job * RET 0 or a slurm error code */ extern int slurm_requeue2(char *job_id_str, uint32_t state, job_array_resp_msg_t **resp) { int rc = SLURM_SUCCESS; requeue_msg_t requeue_req; slurm_msg_t req_msg, resp_msg; slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); requeue_req.job_id = NO_VAL; requeue_req.job_id_str = job_id_str; requeue_req.state = state; req_msg.msg_type = REQUEST_JOB_REQUEUE; req_msg.data = &requeue_req; rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg); switch (resp_msg.msg_type) { case RESPONSE_JOB_ARRAY_ERRORS: *resp = (job_array_resp_msg_t *) resp_msg.data; break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; if (rc) slurm_seterrno(rc); break; default: slurm_seterrno(SLURM_UNEXPECTED_MSG_ERROR); } return rc; }
/* returns 0 if invalid gid, otherwise returns 1. Set gid with * correct gid if root launched job. Also set user_name * if not already set. */ static int _valid_uid_gid(uid_t uid, gid_t *gid, char **user_name) { struct passwd *pwd; struct group *grp; int i; #ifdef HAVE_NATIVE_CRAY /* already verified */ if (*user_name) return 1; #endif pwd = getpwuid(uid); if (!pwd) { error("uid %ld not found on system", (long) uid); slurm_seterrno(ESLURMD_UID_NOT_FOUND); return 0; } if (!*user_name) *user_name = xstrdup(pwd->pw_name); if (pwd->pw_gid == *gid) return 1; grp = getgrgid(*gid); if (!grp) { error("gid %ld not found on system", (long)(*gid)); slurm_seterrno(ESLURMD_GID_NOT_FOUND); return 0; } /* Allow user root to use any valid gid */ if (pwd->pw_uid == 0) { pwd->pw_gid = *gid; return 1; } for (i = 0; grp->gr_mem[i]; i++) { if (!strcmp(pwd->pw_name, grp->gr_mem[i])) { pwd->pw_gid = *gid; return 1; } } /* root user may have launched this job for this user, but * root did not explicitly set the gid. This would set the * gid to 0. In this case we should set the appropriate * default gid for the user (from the passwd struct). */ if (*gid == 0) { *gid = pwd->pw_gid; return 1; } error("uid %ld is not a member of gid %ld", (long)pwd->pw_uid, (long)(*gid)); slurm_seterrno(ESLURMD_GID_NOT_FOUND); return 0; }
slurm_fd_t _slurm_open_stream(slurm_addr_t *addr, bool retry) { int retry_cnt; slurm_fd_t fd; uint16_t port; char ip[32]; if ( (addr->sin_family == 0) || (addr->sin_port == 0) ) { error("Error connecting, bad data: family = %u, port = %u", addr->sin_family, addr->sin_port); return SLURM_SOCKET_ERROR; } for (retry_cnt=0; ; retry_cnt++) { int rc; if ((fd =_slurm_create_socket(SLURM_STREAM)) < 0) { error("Error creating slurm stream socket: %m"); slurm_seterrno(errno); return SLURM_SOCKET_ERROR; } if (retry_cnt) { if (retry_cnt == 1) { debug3("Error connecting, " "picking new stream port"); } _sock_bind_wild(fd); } rc = _slurm_connect(fd, (struct sockaddr const *)addr, sizeof(*addr)); if (rc >= 0) /* success */ break; if (((errno != ECONNREFUSED) && (errno != ETIMEDOUT)) || (!retry) || (retry_cnt >= PORT_RETRIES)) { slurm_seterrno(errno); goto error; } if ((_slurm_close_stream(fd) < 0) && (errno == EINTR)) _slurm_close_stream(fd); /* try again */ } return fd; error: slurm_get_ip_str(addr, &port, ip, sizeof(ip)); debug2("Error connecting slurm stream socket at %s:%d: %m", ip, ntohs(port)); if ((_slurm_close_stream(fd) < 0) && (errno == EINTR)) _slurm_close_stream(fd); /* try again */ return SLURM_SOCKET_ERROR; }
/* * slurm_update_job2 - issue RPC to a job's configuration per request, * only usable by user root or (for some parameters) the job's owner * IN job_msg - description of job updates * OUT resp - per task response to the request, * free using slurm_free_job_array_resp() * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set */ extern int slurm_update_job2 (job_desc_msg_t * job_msg, job_array_resp_msg_t **resp) { int rc = SLURM_SUCCESS; slurm_msg_t req_msg, resp_msg; slurmdb_cluster_rec_t *save_working_cluster_rec = working_cluster_rec; slurm_msg_t_init(&req_msg); req_msg.msg_type = REQUEST_UPDATE_JOB; req_msg.data = job_msg; tryagain: slurm_msg_t_init(&resp_msg); rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg, working_cluster_rec); switch (resp_msg.msg_type) { case RESPONSE_SLURM_REROUTE_MSG: { reroute_msg_t *rr_msg = (reroute_msg_t *)resp_msg.data; /* Don't expect mutliple hops but in the case it does * happen, free the previous rr cluster_rec. */ if (working_cluster_rec && working_cluster_rec != save_working_cluster_rec) slurmdb_destroy_cluster_rec( working_cluster_rec); working_cluster_rec = rr_msg->working_cluster_rec; slurmdb_setup_cluster_rec(working_cluster_rec); rr_msg->working_cluster_rec = NULL; goto tryagain; } case RESPONSE_JOB_ARRAY_ERRORS: *resp = (job_array_resp_msg_t *) resp_msg.data; break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; if (rc) slurm_seterrno(rc); break; default: slurm_seterrno(SLURM_UNEXPECTED_MSG_ERROR); } if (working_cluster_rec != save_working_cluster_rec) { slurmdb_destroy_cluster_rec(working_cluster_rec); working_cluster_rec = save_working_cluster_rec; } return rc; }
/* slurm_load_assoc_mgr_info() * * Load requested controller assoc_mgr state. * */ extern int slurm_load_assoc_mgr_info(assoc_mgr_info_request_msg_t *req, assoc_mgr_info_msg_t **resp) { int cc; slurm_msg_t msg_request; slurm_msg_t msg_reply; slurm_msg_t_init(&msg_request); slurm_msg_t_init(&msg_reply); msg_request.msg_type = REQUEST_ASSOC_MGR_INFO; msg_request.data = req; cc = slurm_send_recv_controller_msg(&msg_request, &msg_reply); if (cc < 0) return SLURM_ERROR; switch (msg_reply.msg_type) { case RESPONSE_ASSOC_MGR_INFO: *resp = msg_reply.data; break; case RESPONSE_SLURM_RC: cc = ((return_code_msg_t *)msg_reply.data)->return_code; slurm_free_return_code_msg(msg_reply.data); if (cc) /* slurm_seterrno_ret() is a macro ... sigh */ slurm_seterrno(cc); return -1; default: slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); break; } return SLURM_PROTOCOL_SUCCESS; }
extern int proctrack_p_wait(uint64_t cont_id) { pid_t pgid = (pid_t)cont_id; int delay = 1; if (cont_id == 0 || cont_id == 1) { slurm_seterrno(EINVAL); return SLURM_ERROR; } /* Spin until the process group is gone. */ while (killpg(pgid, 0) == 0) { proctrack_p_signal(cont_id, SIGKILL); sleep(delay); if (delay < 120) { delay *= 2; } else { error("%s: Unable to destroy container %"PRIu64" " "in pgid plugin, giving up after %d sec", __func__, cont_id, delay); break; } } return SLURM_SUCCESS; }
extern int checkpoint_tasks (uint32_t job_id, uint32_t step_id, time_t begin_time, char *image_dir, uint16_t wait, char *nodelist) { int rc = SLURM_SUCCESS, temp_rc; checkpoint_tasks_msg_t ckpt_req; slurm_msg_t req_msg; List ret_list; ret_data_info_t *ret_data_info = NULL; slurm_msg_t_init(&req_msg); ckpt_req.job_id = job_id; ckpt_req.job_step_id = step_id; ckpt_req.timestamp = begin_time; ckpt_req.image_dir = image_dir; req_msg.msg_type = REQUEST_CHECKPOINT_TASKS; req_msg.data = &ckpt_req; if ((ret_list = slurm_send_recv_msgs(nodelist, &req_msg, (wait*1000), false))) { while ((ret_data_info = list_pop(ret_list))) { temp_rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); if (temp_rc) rc = temp_rc; } } else { error("slurm_checkpoint_tasks: no list was returned"); rc = SLURM_ERROR; } slurm_seterrno(rc); return rc; }
/* * Handle a return code message type. * Sets errno to return code and returns it */ static int _handle_rc_msg(slurm_msg_t *msg) { int rc = ((return_code_msg_t *) msg->data)->return_code; slurm_free_return_code_msg(msg->data); slurm_seterrno(rc); return rc; }
extern int switch_p_get_jobinfo(switch_jobinfo_t *switch_job, int key, void *resulting_data) { if (debug_flags & DEBUG_FLAG_SWITCH) info("switch_p_get_jobinfoe() starting"); slurm_seterrno(EINVAL); return SLURM_ERROR; }
extern int acct_gather_check_acct_freq_task(uint64_t job_mem_lim, char *acctg_freq) { int task_freq; static uint32_t acct_freq_task = NO_VAL; if (acct_freq_task == NO_VAL) { char *acct_freq = slurm_get_jobacct_gather_freq(); int i = acct_gather_parse_freq(PROFILE_TASK, acct_freq); xfree(acct_freq); /* If the value is -1 lets set the freq to something really high so we don't check this again. */ if (i == -1) acct_freq_task = (uint16_t)NO_VAL; else acct_freq_task = i; } if (!job_mem_lim || !acct_freq_task) return 0; task_freq = acct_gather_parse_freq(PROFILE_TASK, acctg_freq); if (task_freq == -1) return 0; if (task_freq == 0) { error("Can't turn accounting frequency off. " "We need it to monitor memory usage."); slurm_seterrno(ESLURMD_INVALID_ACCT_FREQ); return 1; } else if (task_freq > acct_freq_task) { error("Can't set frequency to %d, it is higher than %u. " "We need it to be at least at this level to " "monitor memory usage.", task_freq, acct_freq_task); slurm_seterrno(ESLURMD_INVALID_ACCT_FREQ); return 1; } return 0; }
/* * scontrol_requeue - requeue a pending or running batch job * IN job_id_str - a job id */ extern void scontrol_requeue(char *job_str) { char *job_id_str; int rc, i; job_array_resp_msg_t *resp = NULL; if (!job_str[0]) { exit_code = 1; return; } if (xstrncasecmp(job_str, "jobid=", 6) == 0) job_str += 6; if (xstrncasecmp(job_str, "job=", 4) == 0) job_str += 4; if (_is_job_id(job_str)) { job_id_str = _next_job_id(); while (job_id_str) { rc = slurm_requeue2(job_id_str, 0, &resp); if (rc != SLURM_SUCCESS) { exit_code = 1; if (quiet_flag != 1) { fprintf(stderr, "%s for job %s\n", slurm_strerror(slurm_get_errno()), job_id_str); } } else if (resp) { for (i = 0; i < resp->job_array_count; i++) { if ((resp->error_code[i] == SLURM_SUCCESS) && (resp->job_array_count == 1)) continue; exit_code = 1; if (quiet_flag == 1) continue; fprintf(stderr, "%s: %s\n", resp->job_array_id[i], slurm_strerror(resp-> error_code[i])); } slurm_free_job_array_resp(resp); resp = NULL; } job_id_str = _next_job_id(); } } else { exit_code = 1; rc = ESLURM_INVALID_JOB_ID; slurm_seterrno(rc); if (quiet_flag != 1) { fprintf(stderr, "%s for job %s\n", slurm_strerror(rc), job_str); } } }
extern int proctrack_p_signal ( uint64_t id, int signal ) { pid_t pid = (pid_t) id; if (!id) { /* no container ID */ } else if (pid == getpid() || pid == getpgid(0)) { error("slurm_signal_container would kill caller!"); } else { return killpg(pid, signal); } slurm_seterrno(ESRCH); return SLURM_ERROR; }
/* * scontrol_hold - perform some job hold/release operation * IN op - suspend/resume operation * IN job_id_str - a job id * RET 0 if no slurm error, errno otherwise. parsing error prints * error message and returns 0 */ extern int scontrol_hold(char *op, char *job_id_str) { int rc = SLURM_SUCCESS; char *next_str; job_desc_msg_t job_msg; uint16_t job_state; slurm_init_job_desc_msg (&job_msg); /* set current user, needed e.g., for AllowGroups checks */ job_msg.user_id = getuid(); if (job_id_str) { job_msg.job_id = (uint32_t) strtol(job_id_str, &next_str, 10); if ((job_msg.job_id == 0) || (next_str[0] != '\0')) { fprintf(stderr, "Invalid job id specified\n"); exit_code = 1; return 0; } } else { fprintf(stderr, "Invalid job id specified\n"); exit_code = 1; return 0; } job_state = scontrol_get_job_state(job_msg.job_id); if (job_state == (uint16_t) NO_VAL) return SLURM_ERROR; if ((job_state & JOB_STATE_BASE) != JOB_PENDING) { slurm_seterrno(ESLURM_JOB_NOT_PENDING); return ESLURM_JOB_NOT_PENDING; } if ((strncasecmp(op, "holdu", 5) == 0) || (strncasecmp(op, "uhold", 5) == 0)) { job_msg.priority = 0; job_msg.alloc_sid = ALLOC_SID_USER_HOLD; } else if (strncasecmp(op, "hold", 4) == 0) { job_msg.priority = 0; job_msg.alloc_sid = 0; } else job_msg.priority = INFINITE; if (slurm_update_job(&job_msg)) return slurm_get_errno(); return rc; }
extern void scontrol_requeue_hold(uint32_t state_flag, char *job_str) { int rc, i; char *job_id_str; job_array_resp_msg_t *resp = NULL; state_flag |= JOB_REQUEUE_HOLD; if (_is_job_id(job_str)) { job_id_str = _next_job_id(); while (job_id_str) { rc = slurm_requeue2(job_id_str, state_flag, &resp); if (rc != SLURM_SUCCESS) { exit_code = 1; if (quiet_flag != 1) { fprintf(stderr, "%s for job %s\n", slurm_strerror(slurm_get_errno()), job_id_str); } } else if (resp) { for (i = 0; i < resp->job_array_count; i++) { if ((resp->error_code[i] == SLURM_SUCCESS) && (resp->job_array_count == 1)) continue; exit_code = 1; if (quiet_flag == 1) continue; fprintf(stderr, "%s: %s\n", resp->job_array_id[i], slurm_strerror(resp-> error_code[i])); } slurm_free_job_array_resp(resp); resp = NULL; } job_id_str = _next_job_id(); } } else { exit_code = 1; rc = ESLURM_INVALID_JOB_ID; slurm_seterrno(rc); if (quiet_flag != 1) { fprintf(stderr, "%s for job %s\n", slurm_strerror(rc), job_str); } } }
/* * slurm_requeue - re-queue a batch job, if already running * then terminate it first * IN job_id - job on which to perform operation * RET 0 or a slurm error code */ extern int slurm_requeue (uint32_t job_id) { int rc; job_id_msg_t requeue_req; slurm_msg_t req_msg; slurm_msg_t_init(&req_msg); requeue_req.job_id = job_id; req_msg.msg_type = REQUEST_JOB_REQUEUE; req_msg.data = &requeue_req; if (slurm_send_recv_controller_rc_msg(&req_msg, &rc) < 0) return SLURM_ERROR; slurm_seterrno(rc); return rc; }
/* * _suspend_op - perform a suspend/resume operation for some job. * IN op - operation to perform * IN job_id - job on which to perform operation * IN step_id - job step on which to perform operation * RET 0 or a slurm error code */ static int _suspend_op (uint16_t op, uint32_t job_id) { int rc; suspend_msg_t sus_req; slurm_msg_t req_msg; slurm_msg_t_init(&req_msg); sus_req.op = op; sus_req.job_id = job_id; req_msg.msg_type = REQUEST_SUSPEND; req_msg.data = &sus_req; if (slurm_send_recv_controller_rc_msg(&req_msg, &rc) < 0) return SLURM_ERROR; slurm_seterrno(rc); return rc; }
/* * Move the specified job ID to the top of the queue for a given user ID, * partition, account, and QOS. * IN job_id_str - a job id * RET 0 or -1 on error */ extern int slurm_top_job(char *job_id_str) { int rc = SLURM_SUCCESS; top_job_msg_t top_job_req; slurm_msg_t req_msg; slurm_msg_t_init(&req_msg); top_job_req.job_id_str = job_id_str; req_msg.msg_type = REQUEST_TOP_JOB; req_msg.data = &top_job_req; if (slurm_send_recv_controller_rc_msg(&req_msg, &rc) < 0) return SLURM_ERROR; slurm_seterrno(rc); return rc; }
/* slurm_load_licenses() * * Load requested licenses from the controller. * */ extern int slurm_load_licenses(time_t t, license_info_msg_t **lic_info, uint16_t show_flags) { int cc; slurm_msg_t msg_request; slurm_msg_t msg_reply; struct license_info_request_msg req; memset(&req, 0, sizeof(struct license_info_request_msg)); slurm_msg_t_init(&msg_request); slurm_msg_t_init(&msg_reply); msg_request.msg_type = REQUEST_LICENSE_INFO; req.last_update = t; req.show_flags = show_flags; msg_request.data = &req; cc = slurm_send_recv_controller_msg(&msg_request, &msg_reply, working_cluster_rec); if (cc < 0) return SLURM_ERROR; switch (msg_reply.msg_type) { case RESPONSE_LICENSE_INFO: *lic_info = msg_reply.data; break; case RESPONSE_SLURM_RC: cc = ((return_code_msg_t *)msg_reply.data)->return_code; slurm_free_return_code_msg(msg_reply.data); if (cc) /* slurm_seterrno_ret() is a macro ... sigh */ slurm_seterrno(cc); *lic_info = NULL; return -1; default: slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); break; } return SLURM_PROTOCOL_SUCCESS; }
/* * slurm_requeue - re-queue a batch job, if already running * then terminate it first * IN job_id - job on which to perform operation * IN state - state in which to place the job * RET 0 or a slurm error code */ extern int slurm_requeue(uint32_t job_id, uint32_t state) { int rc = SLURM_SUCCESS; requeue_msg_t requeue_req; slurm_msg_t req_msg; slurm_msg_t_init(&req_msg); requeue_req.job_id = job_id; requeue_req.job_id_str = NULL; requeue_req.state = state; req_msg.msg_type = REQUEST_JOB_REQUEUE; req_msg.data = &requeue_req; if (slurm_send_recv_controller_rc_msg(&req_msg, &rc) < 0) return SLURM_ERROR; slurm_seterrno(rc); return rc; }
static int _check_acct_freq_task(uint32_t job_mem_lim, char *acctg_freq) { int task_freq; if (!job_mem_lim || !conf->acct_freq_task) return 0; task_freq = acct_gather_parse_freq(PROFILE_TASK, acctg_freq); if (task_freq == -1) return 0; if ((task_freq == 0) || (task_freq > conf->acct_freq_task)) { error("Can't set frequency to %d, it is higher than %u. " "We need it to be at least at this level to " "monitor memory usage.", task_freq, conf->acct_freq_task); slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ); return 1; } return 0; }
extern int slurm_container_plugin_wait(uint64_t cont_id) { pid_t pgid = (pid_t)cont_id; int delay = 1; if (cont_id == 0 || cont_id == 1) { slurm_seterrno(EINVAL); return SLURM_ERROR; } /* Spin until the process group is gone. */ while (killpg(pgid, 0) == 0) { slurm_container_plugin_signal(cont_id, SIGKILL); sleep(delay); if (delay < 120) { delay *= 2; } else { error("Unable to destroy container %"PRIu64"", cont_id); } } return SLURM_SUCCESS; }
/* * _checkpoint_op - perform many checkpoint operation for some job step. * IN op - operation to perform * IN data - operation-specific data * IN job_id - job on which to perform operation * IN step_id - job step on which to perform operation * IN image_dir - directory used to get/put checkpoint images * RET 0 or a slurm error code */ static int _checkpoint_op (uint16_t op, uint16_t data, uint32_t job_id, uint32_t step_id, char *image_dir) { int rc; checkpoint_msg_t ckp_req; slurm_msg_t req_msg; slurm_msg_t_init(&req_msg); ckp_req.op = op; ckp_req.data = data; ckp_req.job_id = job_id; ckp_req.step_id = step_id; ckp_req.image_dir = image_dir; req_msg.msg_type = REQUEST_CHECKPOINT; req_msg.data = &ckp_req; if (slurm_send_recv_controller_rc_msg(&req_msg, &rc, working_cluster_rec) < 0) return SLURM_ERROR; slurm_seterrno(rc); return rc; }
extern int switch_p_get_jobinfo(switch_jobinfo_t *switch_job, int key, void *resulting_data) { slurm_seterrno(EINVAL); return SLURM_ERROR; }
/* * Current process is running as the user when this is called. */ extern void exec_task(stepd_step_rec_t *job, int local_proc_id) { uint32_t *gtids; /* pointer to array of ranks */ int fd, j; stepd_step_task_info_t *task = job->task[local_proc_id]; char **tmp_env; int saved_errno; uint32_t node_offset = 0, task_offset = 0; if (job->node_offset != NO_VAL) node_offset = job->node_offset; if (job->pack_task_offset != NO_VAL) task_offset = job->pack_task_offset; gtids = xmalloc(job->node_tasks * sizeof(uint32_t)); for (j = 0; j < job->node_tasks; j++) gtids[j] = job->task[j]->gtid + task_offset; job->envtp->sgtids = _uint32_array_to_str(job->node_tasks, gtids); xfree(gtids); if (job->pack_jobid != NO_VAL) job->envtp->jobid = job->pack_jobid; else job->envtp->jobid = job->jobid; job->envtp->stepid = job->stepid; job->envtp->nodeid = job->nodeid + node_offset; job->envtp->cpus_on_node = job->cpus; job->envtp->procid = task->gtid + task_offset; job->envtp->localid = task->id; job->envtp->task_pid = getpid(); job->envtp->distribution = job->task_dist; job->envtp->cpu_bind = xstrdup(job->cpu_bind); job->envtp->cpu_bind_type = job->cpu_bind_type; job->envtp->cpu_freq_min = job->cpu_freq_min; job->envtp->cpu_freq_max = job->cpu_freq_max; job->envtp->cpu_freq_gov = job->cpu_freq_gov; job->envtp->mem_bind = xstrdup(job->mem_bind); job->envtp->mem_bind_type = job->mem_bind_type; job->envtp->distribution = -1; job->envtp->ckpt_dir = xstrdup(job->ckpt_dir); job->envtp->batch_flag = job->batch; job->envtp->uid = job->uid; job->envtp->user_name = xstrdup(job->user_name); /* * Modify copy of job's environment. Do not alter in place or * concurrent searches of the environment can generate invalid memory * references. */ job->envtp->env = env_array_copy((const char **) job->env); setup_env(job->envtp, false); setenvf(&job->envtp->env, "SLURM_JOB_GID", "%d", job->gid); setenvf(&job->envtp->env, "SLURMD_NODENAME", "%s", conf->node_name); if (job->tres_bind) { setenvf(&job->envtp->env, "SLURMD_TRES_BIND", "%s", job->tres_bind); } if (job->tres_freq) { setenvf(&job->envtp->env, "SLURMD_TRES_FREQ", "%s", job->tres_freq); } tmp_env = job->env; job->env = job->envtp->env; env_array_free(tmp_env); job->envtp->env = NULL; xfree(job->envtp->task_count); if (task->argv[0] && *task->argv[0] != '/') { /* * Normally the client (srun) expands the command name * to a fully qualified path, but in --multi-prog mode it * is left up to the server to search the PATH for the * executable. */ task->argv[0] = _build_path(task->argv[0], job->env, NULL); } if (!job->batch && (job->stepid != SLURM_EXTERN_CONT)) { if (switch_g_job_attach(job->switch_job, &job->env, job->nodeid, (uint32_t) local_proc_id, job->nnodes, job->ntasks, task->gtid) < 0) { error("Unable to attach to interconnect: %m"); log_fini(); exit(1); } if (_setup_mpi(job, local_proc_id) != SLURM_SUCCESS) { error("Unable to configure MPI plugin: %m"); log_fini(); exit(1); } } /* task-specific pre-launch activities */ /* task plugin hook */ if (task_g_pre_launch(job)) { error("Failed to invoke task plugins: task_p_pre_launch error"); exit(1); } if (!job->batch && (job->accel_bind_type || job->tres_bind || job->tres_freq)) { /* * Modify copy of job's environment. Do not alter in place or * concurrent searches of the environment can generate invalid * memory references. * * Also sets GRES frequency as needed. */ job->envtp->env = env_array_copy((const char **) job->env); gres_plugin_step_set_env(&job->envtp->env, job->step_gres_list, job->accel_bind_type, job->tres_bind, job->tres_freq, local_proc_id); tmp_env = job->env; job->env = job->envtp->env; env_array_free(tmp_env); } if (spank_user_task(job, local_proc_id) < 0) { error("Failed to invoke spank plugin stack"); exit(1); } if (conf->task_prolog) { char *my_prolog; slurm_mutex_lock(&conf->config_mutex); my_prolog = xstrdup(conf->task_prolog); slurm_mutex_unlock(&conf->config_mutex); _run_script_and_set_env("slurm task_prolog", my_prolog, job); xfree(my_prolog); } if (job->task_prolog) { _run_script_and_set_env("user task_prolog", job->task_prolog, job); } /* * Set TMPDIR after running prolog scripts, since TMPDIR * might be set or changed in one of the prolog scripts. */ if (local_proc_id == 0) _make_tmpdir(job); if (!job->batch) pdebug_stop_current(job); if (job->env == NULL) { debug("job->env is NULL"); job->env = (char **)xmalloc(sizeof(char *)); job->env[0] = (char *)NULL; } if (job->restart_dir) { info("restart from %s", job->restart_dir); /* no return on success */ checkpoint_restart_task(job, job->restart_dir, task->gtid); error("Restart task failed: %m"); exit(errno); } if (task->argv[0] == NULL) { error("No executable program specified for this task"); exit(2); } /* Do this last so you don't worry too much about the users limits including the slurmstepd in with it. */ if (set_user_limits(job) < 0) { debug("Unable to set user limits"); log_fini(); exit(5); } execve(task->argv[0], task->argv, job->env); saved_errno = errno; /* * print error message and clean up if execve() returns: */ if ((errno == ENOENT) && ((fd = open(task->argv[0], O_RDONLY)) >= 0)) { char buf[256], *eol; int sz; sz = read(fd, buf, sizeof(buf)); if ((sz >= 3) && (xstrncmp(buf, "#!", 2) == 0)) { buf[sizeof(buf)-1] = '\0'; eol = strchr(buf, '\n'); if (eol) eol[0] = '\0'; slurm_seterrno(saved_errno); error("execve(): bad interpreter(%s): %m", buf+2); exit(errno); } } slurm_seterrno(saved_errno); error("execve(): %s: %m", task->argv[0]); exit(errno); }
static int _fed_job_will_run(job_desc_msg_t *req, will_run_response_msg_t **will_run_resp, slurmdb_federation_rec_t *fed) { List resp_msg_list; int pthread_count = 0, i; pthread_t *load_thread = 0; load_willrun_req_struct_t *load_args; pthread_attr_t load_attr; ListIterator iter; will_run_response_msg_t *earliest_resp = NULL; load_willrun_resp_struct_t *tmp_resp; slurmdb_cluster_rec_t *cluster; xassert(req); xassert(will_run_resp); slurm_attr_init(&load_attr); *will_run_resp = NULL; /* Spawn one pthread per cluster to collect job information */ resp_msg_list = list_create(NULL); load_thread = xmalloc(sizeof(pthread_attr_t) * list_count(fed->cluster_list)); iter = list_iterator_create(fed->cluster_list); while ((cluster = (slurmdb_cluster_rec_t *)list_next(iter))) { int retries = 0; if ((cluster->control_host == NULL) || (cluster->control_host[0] == '\0')) continue; /* Cluster down */ load_args = xmalloc(sizeof(load_willrun_req_struct_t)); load_args->cluster = cluster; load_args->req = req; load_args->resp_msg_list = resp_msg_list; while (pthread_create(&load_thread[pthread_count], &load_attr, _load_willrun_thread, (void *)load_args)) { error("pthread_create error %m"); if (++retries > MAX_RETRIES) fatal("Can't create pthread"); usleep(10000); /* sleep and retry */ } pthread_count++; } list_iterator_destroy(iter); slurm_attr_destroy(&load_attr); /* Wait for all pthreads to complete */ for (i = 0; i < pthread_count; i++) pthread_join(load_thread[i], NULL); xfree(load_thread); iter = list_iterator_create(resp_msg_list); while ((tmp_resp = (load_willrun_resp_struct_t *)list_next(iter))) { if (!tmp_resp->willrun_resp_msg) slurm_seterrno(tmp_resp->rc); else if ((!earliest_resp) || (tmp_resp->willrun_resp_msg->start_time < earliest_resp->start_time)) { slurm_free_will_run_response_msg(earliest_resp); earliest_resp = tmp_resp->willrun_resp_msg; tmp_resp->willrun_resp_msg = NULL; } slurm_free_will_run_response_msg(tmp_resp->willrun_resp_msg); xfree(tmp_resp); } list_iterator_destroy(iter); FREE_NULL_LIST(resp_msg_list); *will_run_resp = earliest_resp; if (!earliest_resp) return SLURM_FAILURE; return SLURM_SUCCESS; }
/* Send slurm message with timeout * RET message size (as specified in argument) or SLURM_ERROR on error */ int _slurm_send_timeout(slurm_fd_t fd, char *buf, size_t size, uint32_t flags, int timeout) { int rc; int sent = 0; int fd_flags; struct pollfd ufds; struct timeval tstart; int timeleft = timeout; char temp[2]; ufds.fd = fd; ufds.events = POLLOUT; fd_flags = _slurm_fcntl(fd, F_GETFL); fd_set_nonblocking(fd); gettimeofday(&tstart, NULL); while (sent < size) { timeleft = timeout - _tot_wait(&tstart); if (timeleft <= 0) { debug("_slurm_send_timeout at %d of %zd, timeout", sent, size); slurm_seterrno(SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT); sent = SLURM_ERROR; goto done; } if ((rc = poll(&ufds, 1, timeleft)) <= 0) { if ((rc == 0) || (errno == EINTR) || (errno == EAGAIN)) continue; else { debug("_slurm_send_timeout at %d of %zd, " "poll error: %s", sent, size, strerror(errno)); slurm_seterrno(SLURM_COMMUNICATIONS_SEND_ERROR); sent = SLURM_ERROR; goto done; } } /* * Check here to make sure the socket really is there. * If not then exit out and notify the sender. This * is here since a write doesn't always tell you the * socket is gone, but getting 0 back from a * nonblocking read means just that. */ if (ufds.revents & POLLERR) { debug("_slurm_send_timeout: Socket POLLERR"); slurm_seterrno(ENOTCONN); sent = SLURM_ERROR; goto done; } if ((ufds.revents & POLLHUP) || (ufds.revents & POLLNVAL) || (_slurm_recv(fd, &temp, 1, flags) == 0)) { debug2("_slurm_send_timeout: Socket no longer there"); slurm_seterrno(ENOTCONN); sent = SLURM_ERROR; goto done; } if ((ufds.revents & POLLOUT) != POLLOUT) { error("_slurm_send_timeout: Poll failure, revents:%d", ufds.revents); } rc = _slurm_send(fd, &buf[sent], (size - sent), flags); if (rc < 0) { if (errno == EINTR) continue; debug("_slurm_send_timeout at %d of %zd, " "send error: %s", sent, size, strerror(errno)); if (errno == EAGAIN) { /* poll() lied to us */ usleep(10000); continue; } slurm_seterrno(SLURM_COMMUNICATIONS_SEND_ERROR); sent = SLURM_ERROR; goto done; } if (rc == 0) { debug("_slurm_send_timeout at %d of %zd, " "sent zero bytes", sent, size); slurm_seterrno(SLURM_PROTOCOL_SOCKET_ZERO_BYTES_SENT); sent = SLURM_ERROR; goto done; } sent += rc; } done: /* Reset fd flags to prior state, preserve errno */ if (fd_flags != SLURM_PROTOCOL_ERROR) { int slurm_err = slurm_get_errno(); _slurm_fcntl(fd , F_SETFL , fd_flags); slurm_seterrno(slurm_err); } return sent; }
/* Get slurm message with timeout * RET message size (as specified in argument) or SLURM_ERROR on error */ int _slurm_recv_timeout(slurm_fd_t fd, char *buffer, size_t size, uint32_t flags, int timeout ) { int rc; int recvlen = 0; int fd_flags; struct pollfd ufds; struct timeval tstart; int timeleft = timeout; ufds.fd = fd; ufds.events = POLLIN; fd_flags = _slurm_fcntl(fd, F_GETFL); fd_set_nonblocking(fd); gettimeofday(&tstart, NULL); while (recvlen < size) { timeleft = timeout - _tot_wait(&tstart); if (timeleft <= 0) { debug("_slurm_recv_timeout at %d of %zd, timeout", recvlen, size); slurm_seterrno(SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT); recvlen = SLURM_ERROR; goto done; } if ((rc = poll(&ufds, 1, timeleft)) <= 0) { if ((errno == EINTR) || (errno == EAGAIN) || (rc == 0)) continue; else { debug("_slurm_recv_timeout at %d of %zd, " "poll error: %s", recvlen, size, strerror(errno)); slurm_seterrno( SLURM_COMMUNICATIONS_RECEIVE_ERROR); recvlen = SLURM_ERROR; goto done; } } if (ufds.revents & POLLERR) { debug("_slurm_recv_timeout: Socket POLLERR"); slurm_seterrno(ENOTCONN); recvlen = SLURM_ERROR; goto done; } if ((ufds.revents & POLLNVAL) || ((ufds.revents & POLLHUP) && ((ufds.revents & POLLIN) == 0))) { debug2("_slurm_recv_timeout: Socket no longer there"); slurm_seterrno(ENOTCONN); recvlen = SLURM_ERROR; goto done; } if ((ufds.revents & POLLIN) != POLLIN) { error("_slurm_recv_timeout: Poll failure, revents:%d", ufds.revents); continue; } rc = _slurm_recv(fd, &buffer[recvlen], (size - recvlen), flags); if (rc < 0) { if (errno == EINTR) continue; else { debug("_slurm_recv_timeout at %d of %zd, " "recv error: %s", recvlen, size, strerror(errno)); slurm_seterrno( SLURM_COMMUNICATIONS_RECEIVE_ERROR); recvlen = SLURM_ERROR; goto done; } } if (rc == 0) { debug("_slurm_recv_timeout at %d of %zd, " "recv zero bytes", recvlen, size); slurm_seterrno(SLURM_PROTOCOL_SOCKET_ZERO_BYTES_SENT); recvlen = SLURM_ERROR; goto done; } recvlen += rc; } done: /* Reset fd flags to prior state, preserve errno */ if (fd_flags != SLURM_PROTOCOL_ERROR) { int slurm_err = slurm_get_errno(); _slurm_fcntl(fd , F_SETFL , fd_flags); slurm_seterrno(slurm_err); } return recvlen; }