/* get details about this slurm job: jobid and allocated node */ static void _get_job_info(void) { xassert(params.job_id != NO_VAL); if (slurm_sbcast_lookup(params.job_id, params.step_id, &sbcast_cred) != SLURM_SUCCESS) { if (params.step_id == NO_VAL) { error("Slurm job ID %u lookup error: %s", params.job_id, slurm_strerror(slurm_get_errno())); } else { error("Slurm step ID %u.%u lookup error: %s", params.job_id, params.step_id, slurm_strerror(slurm_get_errno())); } exit(1); } if (params.step_id == NO_VAL) verbose("jobid = %u", params.job_id); else verbose("jobid = %u.%u", params.job_id, params.step_id); verbose("node_cnt = %u", sbcast_cred->node_cnt); verbose("node_list = %s", sbcast_cred->node_list); /* also see sbcast_cred->node_addr (array) */ if (params.verbose) print_sbcast_cred(sbcast_cred->sbcast_cred); /* do not bother to release the return message, * we need to preserve and use most of the information later */ }
/* * Test if any BG blocks are in deallocating state since they are * probably related to this job we will want to sleep longer * RET 1: deallocate in progress * 0: no deallocate in progress * -1: error occurred */ static int _blocks_dealloc(void) { static block_info_msg_t *bg_info_ptr = NULL, *new_bg_ptr = NULL; int rc = 0, error_code = 0, i; if (bg_info_ptr) { error_code = slurm_load_block_info(bg_info_ptr->last_update, &new_bg_ptr, SHOW_ALL); if (error_code == SLURM_SUCCESS) slurm_free_block_info_msg(bg_info_ptr); else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_SUCCESS; new_bg_ptr = bg_info_ptr; } } else { error_code = slurm_load_block_info((time_t) NULL, &new_bg_ptr, SHOW_ALL); } if (error_code) { error("slurm_load_partitions: %s", slurm_strerror(slurm_get_errno())); return -1; } for (i=0; i<new_bg_ptr->record_count; i++) { if(new_bg_ptr->block_array[i].state == BG_BLOCK_TERM) { rc = 1; break; } } bg_info_ptr = new_bg_ptr; return rc; }
/* get details about this slurm job: jobid and allocated node */ static int _get_job_info(struct bcast_parameters *params) { int rc; xassert(params->job_id != NO_VAL); rc = slurm_sbcast_lookup(params->job_id, params->step_id, &sbcast_cred); if (rc != SLURM_SUCCESS) { if (params->step_id == NO_VAL) { error("Slurm job ID %u lookup error: %s", params->job_id, slurm_strerror(slurm_get_errno())); } else { error("Slurm step ID %u.%u lookup error: %s", params->job_id, params->step_id, slurm_strerror(slurm_get_errno())); } return rc; } if (params->step_id == NO_VAL) verbose("jobid = %u", params->job_id); else verbose("stepid = %u.%u", params->job_id, params->step_id); verbose("node_cnt = %u", sbcast_cred->node_cnt); verbose("node_list = %s", sbcast_cred->node_list); /* also see sbcast_cred->node_addr (array) */ if (params->verbose) print_sbcast_cred(sbcast_cred->sbcast_cred); /* do not bother to release the return message, * we need to preserve and use most of the information later */ return rc; }
/* * scontrol_requeue - requeue a pending or running batch job * IN job_id_str - a job id * RET 0 if no slurm error, errno otherwise. parsing error prints * error message and returns 0 */ extern int scontrol_requeue(int argc, char **argv) { int rc = SLURM_SUCCESS; int i; uint32_t *ids; uint32_t num_ids; if (! argv[0]) { exit_code = 1; return 0; } ids = _get_job_ids(argv[0], &num_ids); if (ids == NULL) { exit_code = 1; return 0; } for (i = 0; i < num_ids; i++) { rc = slurm_requeue(ids[i], 0); if (rc != SLURM_SUCCESS) { fprintf(stderr, "%s array job_id %u\n", slurm_strerror(slurm_get_errno()), ids[i]); exit_code = 1; break; } } xfree(ids); return rc; }
/************ * Functions * ************/ static int _get_new_info_block(block_info_msg_t **block_ptr) { int error_code = SLURM_NO_CHANGE_IN_DATA; #ifdef HAVE_BG static block_info_msg_t *bg_info_ptr = NULL; static block_info_msg_t *new_bg_ptr = NULL; if (bg_info_ptr) { error_code = slurm_load_block_info(bg_info_ptr->last_update, &new_bg_ptr, SHOW_ALL); if (error_code == SLURM_SUCCESS) { slurm_free_block_info_msg(bg_info_ptr); } else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_NO_CHANGE_IN_DATA; new_bg_ptr = bg_info_ptr; } } else { error_code = slurm_load_block_info((time_t) NULL, &new_bg_ptr, SHOW_ALL); } bg_info_ptr = new_bg_ptr; if (*block_ptr != bg_info_ptr) error_code = SLURM_SUCCESS; *block_ptr = new_bg_ptr; #endif return error_code; }
/* * slurm_signal_job - send the specified signal to all steps of an existing job * IN job_id - the job's id * IN signal - signal number * RET 0 on success, otherwise return -1 and set errno to indicate the error */ extern int slurm_signal_job (uint32_t job_id, uint16_t signal) { int rc = SLURM_SUCCESS; resource_allocation_response_msg_t *alloc_info = NULL; signal_job_msg_t rpc; if (slurm_allocation_lookup_lite(job_id, &alloc_info)) { rc = slurm_get_errno(); goto fail1; } /* same remote procedure call for each node */ rpc.job_id = job_id; rpc.signal = (uint32_t)signal; rc = _local_send_recv_rc_msgs(alloc_info->node_list, REQUEST_SIGNAL_JOB, &rpc); slurm_free_resource_allocation_response_msg(alloc_info); fail1: if (rc) { slurm_seterrno_ret(rc); } else { return SLURM_SUCCESS; } }
/* Load current node table information into *node_buffer_pptr */ extern int scontrol_load_nodes (node_info_msg_t ** node_buffer_pptr, uint16_t show_flags) { int error_code; static int last_show_flags = 0xffff; node_info_msg_t *node_info_ptr = NULL; if (old_node_info_ptr) { if (last_show_flags != show_flags) old_node_info_ptr->last_update = (time_t) 0; error_code = slurm_load_node (old_node_info_ptr->last_update, &node_info_ptr, show_flags); if (error_code == SLURM_SUCCESS) slurm_free_node_info_msg (old_node_info_ptr); else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) { node_info_ptr = old_node_info_ptr; error_code = SLURM_SUCCESS; if (quiet_flag == -1) printf ("slurm_load_node no change in data\n"); } } else error_code = slurm_load_node ((time_t) NULL, &node_info_ptr, show_flags); if (error_code == SLURM_SUCCESS) { old_node_info_ptr = node_info_ptr; last_show_flags = show_flags; *node_buffer_pptr = node_info_ptr; } return error_code; }
static resource_allocation_response_msg_t * _wait_for_allocation_response(uint32_t job_id, const listen_t *listen, int timeout) { resource_allocation_response_msg_t *resp = NULL; int errnum; info("job %u queued and waiting for resources", job_id); if (_wait_for_alloc_rpc(listen, timeout, &resp) <= 0) { errnum = errno; /* Maybe the resource allocation response RPC got lost * in the mail; surely it should have arrived by now. * Let's see if the controller thinks that the allocation * has been granted. */ if (slurm_allocation_lookup_lite(job_id, &resp) >= 0) { return resp; } if (slurm_get_errno() == ESLURM_JOB_PENDING) { debug3("Still waiting for allocation"); errno = errnum; return NULL; } else { debug3("Unable to confirm allocation for job %u: %m", job_id); return NULL; } } info("job %u has been allocated resources", job_id); return resp; }
extern int get_new_info_config(slurm_ctl_conf_info_msg_t **info_ptr) { static slurm_ctl_conf_info_msg_t *new_ctl_ptr = NULL; int error_code = SLURM_NO_CHANGE_IN_DATA; if (g_ctl_info_ptr) { error_code = slurm_load_ctl_conf(g_ctl_info_ptr->last_update, &new_ctl_ptr); if (error_code == SLURM_SUCCESS) slurm_free_ctl_conf(g_ctl_info_ptr); else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_NO_CHANGE_IN_DATA; new_ctl_ptr = g_ctl_info_ptr; } } else { new_ctl_ptr = NULL; error_code = slurm_load_ctl_conf((time_t) NULL, &new_ctl_ptr); } g_ctl_info_ptr = new_ctl_ptr; if (g_ctl_info_ptr && (*info_ptr != g_ctl_info_ptr)) error_code = SLURM_SUCCESS; *info_ptr = new_ctl_ptr; return error_code; }
/* * Send message to stdout of specified job * argv[0] == jobid * argv[1]++ the message */ extern int scontrol_job_notify(int argc, char *argv[]) { int i; uint32_t job_id; char *message = NULL; job_id = atoi(argv[0]); if (job_id <= 0) { fprintf(stderr, "Invalid job_id %s", argv[0]); return 1; } for (i=1; i<argc; i++) { if (message) xstrfmtcat(message, " %s", argv[i]); else xstrcat(message, argv[i]); } i = slurm_notify_job(job_id, message); xfree(message); if (i) return slurm_get_errno (); else return 0; }
/* * scontrol_create_part - create a slurm partition configuration per the * supplied arguments * IN argc - count of arguments * IN argv - list of arguments * RET 0 if no slurm error, errno otherwise. parsing error prints * error message and returns 0 */ extern int scontrol_create_part (int argc, char *argv[]) { int update_cnt = 0; update_part_msg_t part_msg; slurm_init_part_desc_msg ( &part_msg ); scontrol_parse_part_options (argc, argv, &update_cnt, &part_msg); if (part_msg.name == NULL) { exit_code = 1; error("PartitionName must be given."); return 0; } else if (xstrcasecmp(part_msg.name, "default") == 0) { exit_code = 1; error("PartitionName cannot be \"DEFAULT\"."); return 0; } if (update_cnt == 0) { exit_code = 1; error("No parameters specified"); return 0; } if (slurm_create_partition(&part_msg)) { exit_code = 1; slurm_perror("Error creating the partition"); return slurm_get_errno (); } else return 0; }
/* * Load current front_end table information into *node_buffer_pptr */ extern int scontrol_load_front_end(front_end_info_msg_t ** front_end_buffer_pptr) { int error_code; front_end_info_msg_t *front_end_info_ptr = NULL; if (old_front_end_info_ptr) { error_code = slurm_load_front_end ( old_front_end_info_ptr->last_update, &front_end_info_ptr); if (error_code == SLURM_SUCCESS) slurm_free_front_end_info_msg (old_front_end_info_ptr); else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) { front_end_info_ptr = old_front_end_info_ptr; error_code = SLURM_SUCCESS; if (quiet_flag == -1) { printf("slurm_load_front_end no change in " "data\n"); } } } else error_code = slurm_load_front_end((time_t) NULL, &front_end_info_ptr); if (error_code == SLURM_SUCCESS) { old_front_end_info_ptr = front_end_info_ptr; *front_end_buffer_pptr = front_end_info_ptr; } return error_code; }
/* Load current partiton table information into *part_buffer_pptr */ extern int scontrol_load_block (block_info_msg_t **block_info_pptr) { int error_code; block_info_msg_t *info_ptr = NULL; uint16_t show_flags = 0; if (all_flag) show_flags |= SHOW_ALL; if (old_block_info_ptr) { error_code = slurm_load_block_info( old_block_info_ptr->last_update, &info_ptr, show_flags); if (error_code == SLURM_SUCCESS) slurm_free_block_info_msg(old_block_info_ptr); else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { info_ptr = old_block_info_ptr; error_code = SLURM_SUCCESS; if (quiet_flag == -1) printf ("slurm_load_block no " "change in data\n"); } } else error_code = slurm_load_block_info((time_t)NULL, &info_ptr, show_flags); if (error_code == SLURM_SUCCESS) { old_block_info_ptr = info_ptr; *block_info_pptr = info_ptr; } return error_code; }
/* * scontrol_update_part - update the slurm partition configuration per the * supplied arguments * IN argc - count of arguments * IN argv - list of arguments * RET 0 if no slurm error, errno otherwise. parsing error prints * error message and returns 0 */ extern int scontrol_update_part (int argc, char *argv[]) { int update_cnt = 0; update_part_msg_t part_msg; slurm_init_part_desc_msg ( &part_msg ); scontrol_parse_part_options (argc, argv, &update_cnt, &part_msg); if (part_msg.name == NULL) { exit_code = 1; error("PartitionName must be given."); return 0; } if (update_cnt <= 1) { exit_code = 1; error("No changes specified"); return 0; } if (slurm_update_partition(&part_msg)) { exit_code = 1; return slurm_get_errno (); } else return 0; }
/* get details about this slurm job: jobid and allocated node */ static void _get_job_info(void) { char *jobid_str; uint32_t jobid; jobid_str = getenv("SLURM_JOB_ID"); if (!jobid_str) { error("Command only valid from within SLURM job"); exit(1); } jobid = (uint32_t) atol(jobid_str); verbose("jobid = %u", jobid); if (slurm_sbcast_lookup(jobid, &sbcast_cred) != SLURM_SUCCESS) { error("SLURM jobid %u lookup error: %s", jobid, slurm_strerror(slurm_get_errno())); exit(1); } verbose("node_cnt = %u", sbcast_cred->node_cnt); verbose("node_list = %s", sbcast_cred->node_list); /* also see sbcast_cred->node_addr (array) */ if (params.verbose) print_sbcast_cred(sbcast_cred->sbcast_cred); /* do not bother to release the return message, * we need to preserve and use most of the information later */ }
/* Load current job table information into *job_buffer_pptr */ extern int scontrol_load_job(job_info_msg_t ** job_buffer_pptr, uint32_t job_id) { int error_code; static uint16_t last_show_flags = 0xffff; uint16_t show_flags = 0; job_info_msg_t * job_info_ptr = NULL; if (all_flag) show_flags |= SHOW_ALL; if (detail_flag) { show_flags |= SHOW_DETAIL; if (detail_flag > 1) show_flags |= SHOW_DETAIL2; } if (federation_flag) show_flags |= SHOW_FEDERATION; if (local_flag) show_flags |= SHOW_LOCAL; if (sibling_flag) show_flags |= SHOW_FEDERATION | SHOW_SIBLING; if (old_job_info_ptr) { if (last_show_flags != show_flags) old_job_info_ptr->last_update = (time_t) 0; if (job_id) { error_code = slurm_load_job(&job_info_ptr, job_id, show_flags); } else { error_code = slurm_load_jobs( old_job_info_ptr->last_update, &job_info_ptr, show_flags); } if (error_code == SLURM_SUCCESS) slurm_free_job_info_msg (old_job_info_ptr); else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) { job_info_ptr = old_job_info_ptr; error_code = SLURM_SUCCESS; if (quiet_flag == -1) printf ("slurm_load_jobs no change in data\n"); } } else if (job_id) { error_code = slurm_load_job(&job_info_ptr, job_id, show_flags); } else { error_code = slurm_load_jobs((time_t) NULL, &job_info_ptr, show_flags); } if (error_code == SLURM_SUCCESS) { old_job_info_ptr = job_info_ptr; if (job_id) old_job_info_ptr->last_update = (time_t) 0; last_show_flags = show_flags; *job_buffer_pptr = job_info_ptr; } return error_code; }
extern int switch_p_get_errno(void) { int err = slurm_get_errno(); if ((err >= ESLURM_SWITCH_MIN) && (err <= ESLURM_SWITCH_MAX)) return err; return SLURM_SUCCESS; }
/* * scontrol_requeue - requeue a pending or running batch job * IN job_id_str - a job id */ extern void scontrol_requeue(char *job_str) { char *job_id_str; int rc, i; job_array_resp_msg_t *resp = NULL; if (!job_str[0]) { exit_code = 1; return; } if (xstrncasecmp(job_str, "jobid=", 6) == 0) job_str += 6; if (xstrncasecmp(job_str, "job=", 4) == 0) job_str += 4; if (_is_job_id(job_str)) { job_id_str = _next_job_id(); while (job_id_str) { rc = slurm_requeue2(job_id_str, 0, &resp); if (rc != SLURM_SUCCESS) { exit_code = 1; if (quiet_flag != 1) { fprintf(stderr, "%s for job %s\n", slurm_strerror(slurm_get_errno()), job_id_str); } } else if (resp) { for (i = 0; i < resp->job_array_count; i++) { if ((resp->error_code[i] == SLURM_SUCCESS) && (resp->job_array_count == 1)) continue; exit_code = 1; if (quiet_flag == 1) continue; fprintf(stderr, "%s: %s\n", resp->job_array_id[i], slurm_strerror(resp-> error_code[i])); } slurm_free_job_array_resp(resp); resp = NULL; } job_id_str = _next_job_id(); } } else { exit_code = 1; rc = ESLURM_INVALID_JOB_ID; slurm_seterrno(rc); if (quiet_flag != 1) { fprintf(stderr, "%s for job %s\n", slurm_strerror(rc), job_str); } } }
/* _print_job_step - print the specified job step's information */ static int _print_job_steps( bool clear_old ) { int error_code; static job_step_info_response_msg_t * old_step_ptr = NULL; static job_step_info_response_msg_t * new_step_ptr; uint16_t show_flags = 0; if (params.all_flag) show_flags |= SHOW_ALL; if (old_step_ptr) { if (clear_old) old_step_ptr->last_update = 0; /* Use a last_update time of 0 so that we can get an updated * run_time for jobs rather than just its start_time */ error_code = slurm_get_job_steps((time_t) 0, NO_VAL, NO_VAL, &new_step_ptr, show_flags); if (error_code == SLURM_SUCCESS) slurm_free_job_step_info_response_msg( old_step_ptr ); else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_SUCCESS; new_step_ptr = old_step_ptr; } } else { error_code = slurm_get_job_steps((time_t) 0, NO_VAL, NO_VAL, &new_step_ptr, show_flags); } if (error_code) { slurm_perror ("slurm_get_job_steps error"); return SLURM_ERROR; } old_step_ptr = new_step_ptr; if (params.verbose) { printf ("last_update_time=%ld records=%u\n", (long) new_step_ptr->last_update, new_step_ptr->job_step_count); } if (!params.format && !params.format_long) params.format = "%.15i %.8j %.9P %.8u %.9M %N"; if (!params.format_list) { if (params.format) parse_format(params.format); else if (params.format_long) parse_long_format(params.format_long); } print_steps_array( new_step_ptr->job_steps, new_step_ptr->job_step_count, params.format_list ); return SLURM_SUCCESS; }
static s_p_hashtbl_t *_config_make_tbl(char *filename) { s_p_hashtbl_t *tbl = NULL; xassert(filename); if (!(tbl = s_p_hashtbl_create(knl_conf_file_options))) { error("%s: s_p_hashtbl_create error: %s", prog_name, slurm_strerror(slurm_get_errno())); return tbl; } if (s_p_parse_file(tbl, NULL, filename, false) == SLURM_ERROR) { error("%s: s_p_parse_file error: %s", prog_name, slurm_strerror(slurm_get_errno())); s_p_hashtbl_destroy(tbl); tbl = NULL; } return tbl; }
/* * scontrol_update_powercap - update the slurm powercapping configuration per the * supplied arguments * IN argc - count of arguments * IN argv - list of arguments * RET 0 if no slurm error, errno otherwise. parsing error prints * error message and returns 0 */ extern int scontrol_update_powercap (int argc, char *argv[]) { update_powercap_msg_t powercap_msg; int i; char *tag, *val; int taglen, vallen; memset(&powercap_msg, 0, sizeof(update_powercap_msg_t)); powercap_msg.powercap = (uint32_t) NO_VAL; powercap_msg.min_watts = (uint32_t) NO_VAL; powercap_msg.cur_max_watts = (uint32_t) NO_VAL; powercap_msg.adj_max_watts = (uint32_t) NO_VAL; powercap_msg.max_watts = (uint32_t) NO_VAL; for (i=0; i<argc; i++) { tag = argv[i]; val = strchr(argv[i], '='); if (val) { taglen = val - argv[i]; val++; vallen = strlen(val); } else { exit_code = 1; error("Invalid input: %s Request aborted", argv[i]); return -1; } if (strncasecmp(tag, "PowerCap", MAX(taglen, 8)) == 0) { if (strncasecmp(val, "INFINITE", MAX(vallen, 8)) == 0 ) { powercap_msg.powercap = (uint32_t) INFINITE; } else if (parse_uint32(val,&(powercap_msg.powercap))) { error("Invalid PowerCap value: %s", val); return -1; } /* for now, we can break as we do not have other args */ break; } } if (powercap_msg.powercap == (uint32_t) NO_VAL) { exit_code = 1; error("Invalid PowerCap value."); return 0; } if (slurm_update_powercap(&powercap_msg)) { exit_code = 1; return slurm_get_errno (); } else return 0; }
extern int get_new_info_block(block_info_msg_t **block_ptr, int force) { int error_code = SLURM_NO_CHANGE_IN_DATA; block_info_msg_t *new_bg_ptr = NULL; time_t now = time(NULL); static time_t last; static bool changed = 0; uint16_t show_flags = 0; if (!(cluster_flags & CLUSTER_FLAG_BG)) return error_code; if (g_block_info_ptr && !force && ((now - last) < working_sview_config.refresh_delay)) { if (*block_ptr != g_block_info_ptr) error_code = SLURM_SUCCESS; *block_ptr = g_block_info_ptr; if (changed) error_code = SLURM_SUCCESS; goto end_it; } last = now; if (working_sview_config.show_hidden) show_flags |= SHOW_ALL; if (g_block_info_ptr) { error_code = slurm_load_block_info( g_block_info_ptr->last_update, &new_bg_ptr, show_flags); if (error_code == SLURM_SUCCESS) { slurm_free_block_info_msg(g_block_info_ptr); changed = 1; } else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_NO_CHANGE_IN_DATA; new_bg_ptr = g_block_info_ptr; changed = 0; } } else { new_bg_ptr = NULL; error_code = slurm_load_block_info( (time_t) NULL, &new_bg_ptr, show_flags); changed = 1; } g_block_info_ptr = new_bg_ptr; if (block_ptr) { if (g_block_info_ptr && (*block_ptr != g_block_info_ptr)) error_code = SLURM_SUCCESS; *block_ptr = g_block_info_ptr; } end_it: return error_code; }
extern int scontrol_requeue_hold(int argc, char **argv) { int rc = SLURM_SUCCESS; int i; uint32_t state_flag; uint32_t *ids; uint32_t num_ids; char *job_id_str; state_flag = 0; if (argc == 1) job_id_str = argv[0]; else job_id_str = argv[1]; ids = _get_job_ids(job_id_str, &num_ids); if (ids == NULL) { exit_code = 1; return 0; } if (argc == 2) { rc = _parse_requeue_flags(argv[0], &state_flag); if (rc < 0) { error("Invalid state specification %s", argv[0]); exit_code = 1; xfree(ids); return 0; } } state_flag |= JOB_REQUEUE_HOLD; /* Go and requeue the state either in * JOB_SPECIAL_EXIT or HELD state. */ for (i = 0; i < num_ids; i++) { rc = slurm_requeue(ids[i], state_flag); if (rc != SLURM_SUCCESS) { fprintf(stderr, "%s array job_id %u\n", slurm_strerror(slurm_get_errno()), ids[i]); exit_code = 1; break; } } xfree(ids); return rc; }
/* * scontrol_hold - perform some job hold/release operation * IN op - suspend/resume operation * IN job_id_str - a job id * RET 0 if no slurm error, errno otherwise. parsing error prints * error message and returns 0 */ extern int scontrol_hold(char *op, char *job_id_str) { int rc = SLURM_SUCCESS; char *next_str; job_desc_msg_t job_msg; uint16_t job_state; slurm_init_job_desc_msg (&job_msg); /* set current user, needed e.g., for AllowGroups checks */ job_msg.user_id = getuid(); if (job_id_str) { job_msg.job_id = (uint32_t) strtol(job_id_str, &next_str, 10); if ((job_msg.job_id == 0) || (next_str[0] != '\0')) { fprintf(stderr, "Invalid job id specified\n"); exit_code = 1; return 0; } } else { fprintf(stderr, "Invalid job id specified\n"); exit_code = 1; return 0; } job_state = scontrol_get_job_state(job_msg.job_id); if (job_state == (uint16_t) NO_VAL) return SLURM_ERROR; if ((job_state & JOB_STATE_BASE) != JOB_PENDING) { slurm_seterrno(ESLURM_JOB_NOT_PENDING); return ESLURM_JOB_NOT_PENDING; } if ((strncasecmp(op, "holdu", 5) == 0) || (strncasecmp(op, "uhold", 5) == 0)) { job_msg.priority = 0; job_msg.alloc_sid = ALLOC_SID_USER_HOLD; } else if (strncasecmp(op, "hold", 4) == 0) { job_msg.priority = 0; job_msg.alloc_sid = 0; } else job_msg.priority = INFINITE; if (slurm_update_job(&job_msg)) return slurm_get_errno(); return rc; }
extern void scontrol_requeue_hold(uint32_t state_flag, char *job_str) { int rc, i; char *job_id_str; job_array_resp_msg_t *resp = NULL; state_flag |= JOB_REQUEUE_HOLD; if (_is_job_id(job_str)) { job_id_str = _next_job_id(); while (job_id_str) { rc = slurm_requeue2(job_id_str, state_flag, &resp); if (rc != SLURM_SUCCESS) { exit_code = 1; if (quiet_flag != 1) { fprintf(stderr, "%s for job %s\n", slurm_strerror(slurm_get_errno()), job_id_str); } } else if (resp) { for (i = 0; i < resp->job_array_count; i++) { if ((resp->error_code[i] == SLURM_SUCCESS) && (resp->job_array_count == 1)) continue; exit_code = 1; if (quiet_flag == 1) continue; fprintf(stderr, "%s: %s\n", resp->job_array_id[i], slurm_strerror(resp-> error_code[i])); } slurm_free_job_array_resp(resp); resp = NULL; } job_id_str = _next_job_id(); } } else { exit_code = 1; rc = ESLURM_INVALID_JOB_ID; slurm_seterrno(rc); if (quiet_flag != 1) { fprintf(stderr, "%s for job %s\n", slurm_strerror(rc), job_str); } } }
/* * scontrol_update_res - update the slurm reservation configuration per the * supplied arguments * IN argc - count of arguments * IN argv - list of arguments * RET 0 if no slurm error, errno otherwise. parsing error prints * error message and returns 0. */ extern int scontrol_update_res(int argc, char *argv[]) { resv_desc_msg_t resv_msg; int err, ret = 0; int free_user_str = 0, free_acct_str = 0, free_tres_license = 0, free_tres_bb = 0, free_tres_corecnt = 0, free_tres_nodecnt = 0; slurm_init_resv_desc_msg (&resv_msg); err = scontrol_parse_res_options(argc, argv, "No reservation update.", &resv_msg, &free_user_str, &free_acct_str, &free_tres_license, &free_tres_bb, &free_tres_corecnt, &free_tres_nodecnt); if (err) goto SCONTROL_UPDATE_RES_CLEANUP; if (resv_msg.name == NULL) { exit_code = 1; error("Reservation must be given. No reservation update."); goto SCONTROL_UPDATE_RES_CLEANUP; } err = slurm_update_reservation(&resv_msg); if (err) { exit_code = 1; slurm_perror("Error updating the reservation"); ret = slurm_get_errno(); } else { printf("Reservation updated.\n"); } SCONTROL_UPDATE_RES_CLEANUP: if (free_user_str) xfree(resv_msg.users); if (free_acct_str) xfree(resv_msg.accounts); if (free_tres_license) xfree(resv_msg.licenses); if (free_tres_bb) xfree(resv_msg.burst_buffer); if (free_tres_corecnt) xfree(resv_msg.core_cnt); if (free_tres_nodecnt) xfree(resv_msg.node_cnt); return ret; }
extern int get_new_info_resv(reserve_info_msg_t **info_ptr, int force) { static reserve_info_msg_t *new_resv_ptr = NULL; int error_code = SLURM_NO_CHANGE_IN_DATA; time_t now = time(NULL); static time_t last; static bool changed = 0; if (g_resv_info_ptr && !force && ((now - last) < working_sview_config.refresh_delay)) { if (*info_ptr != g_resv_info_ptr) error_code = SLURM_SUCCESS; *info_ptr = g_resv_info_ptr; if (changed) error_code = SLURM_SUCCESS; goto end_it; } last = now; if (g_resv_info_ptr) { error_code = slurm_load_reservations( g_resv_info_ptr->last_update, &new_resv_ptr); if (error_code == SLURM_SUCCESS) { slurm_free_reservation_info_msg(g_resv_info_ptr); changed = 1; } else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_NO_CHANGE_IN_DATA; new_resv_ptr = g_resv_info_ptr; changed = 0; } } else { new_resv_ptr = NULL; error_code = slurm_load_reservations((time_t) NULL, &new_resv_ptr); changed = 1; } g_resv_info_ptr = new_resv_ptr; if (g_resv_info_ptr && (*info_ptr != g_resv_info_ptr)) error_code = SLURM_SUCCESS; *info_ptr = g_resv_info_ptr; end_it: return error_code; }
/* * scontrol_top_job - Move the specified job ID to the top of the queue for * a given user ID, partition, account, and QOS. * IN job_str - a job id */ extern void scontrol_top_job(char *job_id_str) { int rc; if (xstrncasecmp(job_id_str, "jobid=", 6) == 0) job_id_str += 6; if (xstrncasecmp(job_id_str, "job=", 4) == 0) job_id_str += 4; rc = slurm_top_job(job_id_str); if (rc != SLURM_SUCCESS) { exit_code = 1; if (quiet_flag != 1) { fprintf(stderr, "%s for job %s\n", slurm_strerror(slurm_get_errno()), job_id_str); } } }
static const char * slurmdrmaa_get_DRM_system( fsd_drmaa_singletone_t *self ) { if(slurmdrmaa_version[0] == '\0') /*no locks as drmaa_get_drm_system is usually called only once */ { slurm_ctl_conf_t * conf_info_msg_ptr = NULL; if ( slurm_load_ctl_conf ((time_t) NULL, &conf_info_msg_ptr ) == -1 ) { fsd_log_error(("slurm_load_ctl_conf error: %s",slurm_strerror(slurm_get_errno()))); fsd_snprintf(NULL, slurmdrmaa_version, sizeof(slurmdrmaa_version)-1,"SLURM"); } else { fsd_snprintf(NULL, slurmdrmaa_version, sizeof(slurmdrmaa_version)-1,"SLURM %s", conf_info_msg_ptr->version); slurm_free_ctl_conf (conf_info_msg_ptr); } } return slurmdrmaa_version; }
static void _wait_for_allocation_response(uint32_t job_id, const listen_t *listen, uint16_t msg_type, int timeout, void **resp) { int errnum, rc; info("job %u queued and waiting for resources", job_id); *resp = NULL; if ((rc = _wait_for_alloc_rpc(listen, timeout)) == 1) rc = _accept_msg_connection(listen->fd, msg_type, resp); if (rc <= 0) { errnum = errno; /* Maybe the resource allocation response RPC got lost * in the mail; surely it should have arrived by now. * Let's see if the controller thinks that the allocation * has been granted. */ if (msg_type == RESPONSE_RESOURCE_ALLOCATION) { if (slurm_allocation_lookup(job_id, (resource_allocation_response_msg_t **) resp) >= 0) return; } else if (msg_type == RESPONSE_JOB_PACK_ALLOCATION) { if (slurm_pack_job_lookup(job_id, (List *) resp) >= 0) return; } else { error("%s: Invalid msg_type (%u)", __func__, msg_type); } if (slurm_get_errno() == ESLURM_JOB_PENDING) { debug3("Still waiting for allocation"); errno = errnum; return; } else { debug3("Unable to confirm allocation for job %u: %m", job_id); return; } } info("job %u has been allocated resources", job_id); return; }