/* Convert an array of task IDs into a list of host names * RET: the string, caller must xfree() this value */ static char *_task_ids_to_host_list(int ntasks, uint32_t taskids[]) { int i; hostset_t hs; char *hosts; slurm_step_layout_t *sl; if ((sl = launch_common_get_slurm_step_layout(local_srun_job)) == NULL) return (xstrdup("Unknown")); hs = hostset_create(NULL); for (i = 0; i < ntasks; i++) { char *host = slurm_step_layout_host_name(sl, taskids[i]); if (host) { hostset_insert(hs, host); free(host); } else { error("Could not identify host name for task %u", taskids[i]); } } hosts = _hostset_to_string(hs); hostset_destroy(hs); return (hosts); }
/* Convert an array of task IDs into a list of host names * RET: the string, caller must xfree() this value */ static char *_task_ids_to_host_list(int ntasks, uint32_t *taskids) { int i, task_cnt = 0; hostset_t hs; char *hosts; slurm_step_layout_t *sl; if ((sl = launch_common_get_slurm_step_layout(local_srun_job)) == NULL) return (xstrdup("Unknown")); /* If overhead of determining the hostlist is too high then srun * communications will timeout and fail, so return "Unknown" instead. * * See slurm_step_layout_host_id() in src/common/slurm_step_layout.c * for details. */ for (i = 0; i < sl->node_cnt; i++) { task_cnt += sl->tasks[i]; } if (task_cnt > 100000) return (xstrdup("Unknown")); hs = hostset_create(NULL); for (i = 0; i < ntasks; i++) { char *host = slurm_step_layout_host_name(sl, taskids[i]); if (host) { hostset_insert(hs, host); free(host); } else { error("Could not identify host name for task %u", taskids[i]); } } hosts = _hostset_to_string(hs); hostset_destroy(hs); return (hosts); }
/* * slurm_pack_job_will_run - determine if a heterogenous job would execute * immediately if submitted now * IN job_req_list - List of job_desc_msg_t structures describing the resource * allocation request * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set */ extern int slurm_pack_job_will_run(List job_req_list) { job_desc_msg_t *req; will_run_response_msg_t *will_run_resp; char buf[64], *sep = ""; int rc = SLURM_SUCCESS, inx = 0; ListIterator iter, itr; time_t first_start = (time_t) 0; uint32_t first_job_id = 0, tot_proc_count = 0, *job_id_ptr; hostset_t hs = NULL; char *job_list = NULL; if (!job_req_list || (list_count(job_req_list) == 0)) { error("No job descriptors input"); return SLURM_ERROR; } iter = list_iterator_create(job_req_list); while ((req = (job_desc_msg_t *) list_next(iter))) { will_run_resp = NULL; rc = slurm_job_will_run2(req, &will_run_resp); if (will_run_resp) print_multi_line_string( will_run_resp->job_submit_user_msg, inx, LOG_LEVEL_INFO); if ((rc == SLURM_SUCCESS) && will_run_resp) { if (first_job_id == 0) first_job_id = will_run_resp->job_id; if ((first_start == 0) || (first_start < will_run_resp->start_time)) first_start = will_run_resp->start_time; tot_proc_count += will_run_resp->proc_cnt; if (hs) hostset_insert(hs, will_run_resp->node_list); else hs = hostset_create(will_run_resp->node_list); if (will_run_resp->preemptee_job_id) { itr = list_iterator_create(will_run_resp-> preemptee_job_id); while ((job_id_ptr = list_next(itr))) { if (job_list) sep = ","; xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr); } list_iterator_destroy(itr); } slurm_free_will_run_response_msg(will_run_resp); } if (rc != SLURM_SUCCESS) break; inx++; } list_iterator_destroy(iter); if (rc == SLURM_SUCCESS) { char node_list[1028] = ""; if (hs) hostset_ranged_string(hs, sizeof(node_list), node_list); slurm_make_time_str(&first_start, buf, sizeof(buf)); info("Job %u to start at %s using %u processors on %s", first_job_id, buf, tot_proc_count, node_list); if (job_list) info(" Preempts: %s", job_list); } if (hs) hostset_destroy(hs); xfree(job_list); return rc; }
/* * slurm_pack_job_will_run - determine if a heterogenous job would execute * immediately if submitted now * IN job_req_list - List of job_desc_msg_t structures describing the resource * allocation request * RET 0 on success, otherwise return -1 and set errno to indicate the error */ extern int slurm_pack_job_will_run(List job_req_list) { job_desc_msg_t *req; will_run_response_msg_t *will_run_resp; char buf[64], local_hostname[64] = "", *sep = ""; int rc = SLURM_SUCCESS; char *type = "processors"; ListIterator iter, itr; time_t first_start = (time_t) 0; uint32_t first_job_id = 0, tot_proc_count = 0, *job_id_ptr; hostset_t hs = NULL; char *job_list = NULL; if (!job_req_list || (list_count(job_req_list) == 0)) { error("No job descriptors input"); return SLURM_ERROR; } (void) gethostname_short(local_hostname, sizeof(local_hostname)); iter = list_iterator_create(job_req_list); while ((req = (job_desc_msg_t *) list_next(iter))) { if ((req->alloc_node == NULL) && local_hostname[0]) req->alloc_node = local_hostname; will_run_resp = NULL; rc = slurm_job_will_run2(req, &will_run_resp); if ((rc == SLURM_SUCCESS) && will_run_resp) { if (first_job_id == 0) first_job_id = will_run_resp->job_id; if ((first_start == 0) || (first_start < will_run_resp->start_time)) first_start = will_run_resp->start_time; tot_proc_count += will_run_resp->proc_cnt; if (hs) hostset_insert(hs, will_run_resp->node_list); else hs = hostset_create(will_run_resp->node_list); if (will_run_resp->preemptee_job_id) { itr = list_iterator_create(will_run_resp-> preemptee_job_id); while ((job_id_ptr = list_next(itr))) { if (job_list) sep = ","; xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr); } list_iterator_destroy(itr); } slurm_free_will_run_response_msg(will_run_resp); } if (req->alloc_node == local_hostname) req->alloc_node = NULL; if (rc != SLURM_SUCCESS) break; } list_iterator_destroy(iter); if (rc == SLURM_SUCCESS) { uint32_t cluster_flags = slurmdb_setup_cluster_flags(); char node_list[1028] = ""; if (cluster_flags & CLUSTER_FLAG_BG) type = "cnodes"; if (hs) hostset_ranged_string(hs, sizeof(node_list), node_list); slurm_make_time_str(&first_start, buf, sizeof(buf)); info("Job %u to start at %s using %u %s on %s", first_job_id, buf, tot_proc_count, type, node_list); if (job_list) info(" Preempts: %s", job_list); } if (hs) hostset_destroy(hs); xfree(job_list); return rc; }
/* * parse_command_line */ extern void parse_command_line( int argc, char* argv[] ) { char *env_val = NULL; bool override_format_env = false; int opt_char; int option_index; static struct option long_options[] = { {"accounts", required_argument, 0, 'A'}, {"all", no_argument, 0, 'a'}, {"format", required_argument, 0, 'o'}, {"help", no_argument, 0, OPT_LONG_HELP}, {"hide", no_argument, 0, OPT_LONG_HIDE}, {"iterate", required_argument, 0, 'i'}, {"jobs", optional_argument, 0, 'j'}, {"long", no_argument, 0, 'l'}, {"cluster", required_argument, 0, 'M'}, {"clusters", required_argument, 0, 'M'}, {"node", required_argument, 0, 'n'}, {"nodes", required_argument, 0, 'n'}, {"noheader", no_argument, 0, 'h'}, {"partitions", required_argument, 0, 'p'}, {"qos", required_argument, 0, 'q'}, {"reservation",required_argument, 0, 'R'}, {"sort", required_argument, 0, 'S'}, {"start", no_argument, 0, OPT_LONG_START}, {"steps", optional_argument, 0, 's'}, {"states", required_argument, 0, 't'}, {"usage", no_argument, 0, OPT_LONG_USAGE}, {"user", required_argument, 0, 'u'}, {"users", required_argument, 0, 'u'}, {"verbose", no_argument, 0, 'v'}, {"version", no_argument, 0, 'V'}, {NULL, 0, 0, 0} }; if (getenv("SQUEUE_ALL")) params.all_flag = true; if ( ( env_val = getenv("SQUEUE_SORT") ) ) params.sort = xstrdup(env_val); if ( ( env_val = getenv("SLURM_CLUSTERS") ) ) { if (!(params.clusters = slurmdb_get_info_cluster(env_val))) { error("'%s' can't be reached now, " "or it is an invalid entry for " "SLURM_CLUSTERS. Use 'sacctmgr --list " "cluster' to see avaliable clusters.", env_val); exit(1); } working_cluster_rec = list_peek(params.clusters); } while ((opt_char = getopt_long(argc, argv, "A:ahi:j::ln:M:o:p:q:R:s::S:t:u:U:vV", long_options, &option_index)) != -1) { switch (opt_char) { case (int)'?': fprintf(stderr, "Try \"squeue --help\" " "for more information\n"); exit(1); case (int) 'A': case (int) 'U': /* backwards compatibility */ xfree(params.accounts); params.accounts = xstrdup(optarg); params.account_list = _build_str_list( params.accounts ); break; case (int)'a': params.all_flag = true; break; case (int)'h': params.no_header = true; break; case (int) 'i': params.iterate= atoi(optarg); if (params.iterate <= 0) { error ("--iterate=%s\n", optarg); exit(1); } break; case (int) 'j': if (optarg) { params.jobs = xstrdup(optarg); params.job_list = _build_job_list(params.jobs); } params.job_flag = true; break; case (int) 'l': params.long_list = true; override_format_env = true; break; case (int) 'M': if (params.clusters) list_destroy(params.clusters); if (!(params.clusters = slurmdb_get_info_cluster(optarg))) { error("'%s' can't be reached now, " "or it is an invalid entry for " "--cluster. Use 'sacctmgr --list " "cluster' to see avaliable clusters.", optarg); exit(1); } working_cluster_rec = list_peek(params.clusters); break; case (int) 'n': if (params.nodes) hostset_destroy(params.nodes); params.nodes = hostset_create(optarg); if (params.nodes == NULL) { error("'%s' invalid entry for --nodes", optarg); exit(1); } break; case (int) 'o': xfree(params.format); params.format = xstrdup(optarg); override_format_env = true; break; case (int) 'p': xfree(params.partitions); params.partitions = xstrdup(optarg); params.part_list = _build_str_list( params.partitions ); params.all_flag = true; break; case (int) 'q': xfree(params.qoss); params.qoss = xstrdup(optarg); params.qos_list = _build_str_list( params.qoss ); break; case (int) 'R': xfree(params.reservation); params.reservation = xstrdup(optarg); break; case (int) 's': if (optarg) { params.steps = xstrdup(optarg); params.step_list = _build_step_list(params.steps); } params.step_flag = true; override_format_env = true; break; case (int) 'S': xfree(params.sort); params.sort = xstrdup(optarg); break; case (int) 't': xfree(params.states); params.states = xstrdup(optarg); params.state_list = _build_state_list( params.states ); break; case (int) 'u': xfree(params.users); params.users = xstrdup(optarg); params.user_list = _build_user_list( params.users ); break; case (int) 'v': params.verbose++; break; case (int) 'V': print_slurm_version(); exit(0); case OPT_LONG_HELP: _help(); exit(0); case OPT_LONG_HIDE: params.all_flag = false; break; case OPT_LONG_START: params.start_flag = true; break; case OPT_LONG_USAGE: _usage(); exit(0); } } if ( override_format_env == false ) { if ( ( env_val = getenv("SQUEUE_FORMAT") ) ) params.format = xstrdup(env_val); } params.cluster_flags = slurmdb_setup_cluster_flags(); if (optind < argc) { if (params.job_flag) { params.jobs = xstrdup(argv[optind++]); params.job_list = _build_job_list(params.jobs); } else if (params.step_flag) { params.steps = xstrdup(argv[optind++]); params.step_list = _build_step_list(params.steps); } if (optind < argc) { error("Unrecognized option: %s",argv[optind]); _usage(); exit(1); } } if ( params.job_flag && params.step_flag) { if (params.job_list) { verbose("Printing job steps with job filter"); params.job_flag = false; } else { error("Incompatible options --jobs and --steps"); exit(1); } } if ( params.nodes ) { char *name1 = NULL; char *name2 = NULL; hostset_t nodenames = hostset_create(NULL); if (nodenames == NULL) fatal("malloc failure"); while ( hostset_count(params.nodes) > 0 ) { name1 = hostset_pop(params.nodes); /* localhost = use current host name */ if ( strcasecmp("localhost", name1) == 0 ) { name2 = xmalloc(128); gethostname_short(name2, 128); } else { /* translate NodeHostName to NodeName */ name2 = slurm_conf_get_nodename(name1); /* use NodeName if translation failed */ if ( name2 == NULL ) name2 = xstrdup(name1); } hostset_insert(nodenames, name2); free(name1); xfree(name2); } /* Replace params.nodename with the new one */ hostset_destroy(params.nodes); params.nodes = nodenames; } if ( ( params.accounts == NULL ) && ( env_val = getenv("SQUEUE_ACCOUNT") ) ) { params.accounts = xstrdup(env_val); params.account_list = _build_str_list( params.accounts ); } if ( ( params.partitions == NULL ) && ( env_val = getenv("SQUEUE_PARTITION") ) ) { params.partitions = xstrdup(env_val); params.part_list = _build_str_list( params.partitions ); params.all_flag = true; } if ( ( params.qoss == NULL ) && ( env_val = getenv("SQUEUE_QOS") ) ) { params.qoss = xstrdup(env_val); params.qos_list = _build_str_list( params.qoss ); } if ( ( params.states == NULL ) && ( env_val = getenv("SQUEUE_STATES") ) ) { params.states = xstrdup(env_val); params.state_list = _build_state_list( params.states ); } if ( ( params.users == NULL ) && ( env_val = getenv("SQUEUE_USERS") ) ) { params.users = xstrdup(env_val); params.user_list = _build_user_list( params.users ); } if ( params.start_flag && !params.step_flag ) { /* Set more defaults */ if (params.format == NULL) params.format = xstrdup("%.7i %.9P %.8j %.8u %.2t %.19S %.6D %R"); if (params.sort == NULL) params.sort = xstrdup("S"); if (params.states == NULL) { params.states = xstrdup("PD"); params.state_list = _build_state_list( params.states ); } } params.max_cpus = _max_cpus_per_node(); if ( params.verbose ) _print_options(); }