/* * _parse_state - convert job state name string to numeric value * IN str - state name * OUT states - enum job_states value corresponding to str * RET 0 or error code */ static int _parse_state( char* str, uint16_t* states ) { int i; char *state_names; if ((i = job_state_num(str)) >= 0) { *states = (uint16_t) i; return SLURM_SUCCESS; } error ("Invalid job state specified: %s", str); state_names = xstrdup(job_state_string(0)); for (i=1; i<JOB_END; i++) { xstrcat(state_names, ","); xstrcat(state_names, job_state_string(i)); } xstrcat(state_names, ","); xstrcat(state_names, job_state_string(JOB_COMPLETING)); xstrcat(state_names, ","); xstrcat(state_names, job_state_string(JOB_CONFIGURING)); error ("Valid job states include: %s\n", state_names); xfree (state_names); return SLURM_ERROR; }
static int _sort_job_by_state(void *void1, void *void2) { int diff; job_info_t *job1; job_info_t *job2; _get_job_info_from_void(&job1, &job2, void1, void2); diff = strcmp(job_state_string(job1->job_state), job_state_string(job2->job_state)); if (reverse_order) diff = -diff; return diff; }
static void _process_terminated(List job_list, char *f[], int lc, int show_full, int len) { filetxt_job_rec_t *job = NULL; filetxt_job_rec_t *temp = NULL; _parse_line(f, (void **)&temp, len); if (temp == NULL) { error("Unknown proccess terminated"); return; } job = _find_job_record(job_list, temp->header, JOB_TERMINATED); if (!job) { /* fake it for now */ job = _create_filetxt_job_rec(temp->header); job->jobname = xstrdup("(unknown)"); debug("Note: JOB_TERMINATED record for job " "%u preceded " "other job records at line %d\n", temp->header.jobnum, lc); } else if (job->job_terminated_seen) { if (temp->status == JOB_NODE_FAIL) { /* multiple node failures - extra TERMINATED records */ debug("Note: Duplicate JOB_TERMINATED " "record (nf) for job %u at " "line %d\n", temp->header.jobnum, lc); /* JOB_TERMINATED/NF records may be preceded * by a JOB_TERMINATED/CA record; NF is much * more interesting. */ job->status = temp->status; goto finished; } fprintf(stderr, "Conflicting JOB_TERMINATED record (%s) for " "job %u at line %d -- ignoring it\n", job_state_string(temp->status), job->header.jobnum, lc); goto finished; } job->job_terminated_seen = 1; job->elapsed = temp->elapsed; job->end = temp->header.timestamp; job->status = temp->status; job->requid = temp->requid; job->exitcode = temp->exitcode; if (list_count(job->steps) > 1) job->track_steps = 1; job->show_full = show_full; finished: _destroy_filetxt_job_rec(temp); }
int _print_job_job_state(job_info_t * job, int width, bool right, char* suffix) { if (job == NULL) /* Print the Header instead */ _print_str("STATE", width, right, true); else _print_str(job_state_string(job->job_state), width, right, true); if (suffix) printf("%s", suffix); return SLURM_SUCCESS; }
/* Log all SICP job records */ static void _log_sicp_recs(void) { ListIterator sicp_iterator; sicp_job_t *sicp_ptr; sicp_iterator = list_iterator_create(sicp_job_list); while ((sicp_ptr = (sicp_job_t *) list_next(sicp_iterator))) { info("SICP: Job_ID:%u State:%s", sicp_ptr->job_id, job_state_string(sicp_ptr->job_state)); } list_iterator_destroy(sicp_iterator); }
static void _opt_list(void) { int i; info("account : %s", opt.account); info("batch : %s", tf_(opt.batch)); info("ctld : %s", tf_(opt.ctld)); info("full : %s", tf_(opt.full)); info("interactive : %s", tf_(opt.interactive)); info("job_name : %s", opt.job_name); info("nodelist : %s", opt.nodelist); info("partition : %s", opt.partition); info("qos : %s", opt.qos); info("reservation : %s", opt.reservation); if (opt.signal != (uint16_t) NO_VAL) info("signal : %u", opt.signal); info("state : %s", job_state_string(opt.state)); info("user_id : %u", opt.user_id); info("user_name : %s", opt.user_name); info("verbose : %d", opt.verbose); info("wckey : %s", opt.wckey); for (i = 0; i < opt.job_cnt; i++) { if (opt.step_id[i] == SLURM_BATCH_SCRIPT) { if (opt.array_id[i] == NO_VAL) { info("job_id[%d] : %u", i, opt.job_id[i]); } else if (opt.array_id[i] == INFINITE) { info("job_id[%d] : %u_*", i, opt.job_id[i]); } else { info("job_id[%d] : %u_%u", i, opt.job_id[i], opt.array_id[i]); } } else { if (opt.array_id[i] == NO_VAL) { info("job_step_id[%d] : %u.%u", i, opt.job_id[i], opt.step_id[i]); } else if (opt.array_id[i] == INFINITE) { info("job_step_id[%d] : %u_*.%u", i, opt.job_id[i], opt.step_id[i]); } else { info("job_step_id[%d] : %u_%u.%u", i, opt.job_id[i], opt.array_id[i], opt.step_id[i]); } } } }
static void _opt_list(void) { int i; info("account : %s", opt.account); info("batch : %s", tf_(opt.batch)); info("ctld : %s", tf_(opt.ctld)); info("interactive : %s", tf_(opt.interactive)); info("job_name : %s", opt.job_name); info("nodelist : %s", opt.nodelist); info("partition : %s", opt.partition); info("qos : %s", opt.qos); info("reservation : %s", opt.reservation); info("signal : %u", opt.signal); info("state : %s", job_state_string(opt.state)); info("user_id : %u", opt.user_id); info("user_name : %s", opt.user_name); info("verbose : %d", opt.verbose); info("wckey : %s", opt.wckey); for (i=0; i<opt.job_cnt; i++) { info("job_steps : %u.%u ", opt.job_id[i], opt.step_id[i]); } }
extern int slurm_jobcomp_log_record(struct job_record *job_ptr) { int nwritten, B_SIZE = 1024; char usr_str[32], grp_str[32], start_str[32], end_str[32]; char submit_str[32], *cluster = NULL, *qos, *state_string; time_t elapsed_time, submit_time, eligible_time; enum job_states job_state; uint32_t time_limit; uint16_t ntasks_per_node; int i; char *buffer, tmp_str[256], *script_str, *script; struct job_node *jnode; if (list_count(jobslist) > MAX_JOBS) { error("%s: Limit of %d enqueued jobs in memory waiting to be " "indexed reached. Job %lu discarded", plugin_type, MAX_JOBS, (unsigned long)job_ptr->job_id); return SLURM_ERROR; } _get_user_name(job_ptr->user_id, usr_str, sizeof(usr_str)); _get_group_name(job_ptr->group_id, grp_str, sizeof(grp_str)); if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr) time_limit = job_ptr->part_ptr->max_time; else time_limit = job_ptr->time_limit; if (job_ptr->job_state & JOB_RESIZING) { time_t now = time(NULL); state_string = job_state_string(job_ptr->job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&now, end_str, sizeof(end_str)); } else { /* Job state will typically have JOB_COMPLETING or JOB_RESIZING * flag set when called. We remove the flags to get the eventual * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */ job_state = job_ptr->job_state & JOB_STATE_BASE; state_string = job_state_string(job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else if (job_ptr->start_time > job_ptr->end_time) { /* Job cancelled while pending and * expected start time is in the future. */ snprintf(start_str, sizeof(start_str), "Unknown"); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&job_ptr->end_time, end_str, sizeof(end_str)); } elapsed_time = job_ptr->end_time - job_ptr->start_time; buffer = xmalloc(B_SIZE); nwritten = snprintf(buffer, B_SIZE, JOBCOMP_DATA_FORMAT, (unsigned long) job_ptr->job_id, usr_str, (unsigned long) job_ptr->user_id, grp_str, (unsigned long) job_ptr->group_id, start_str, end_str, (long) elapsed_time, job_ptr->partition, job_ptr->alloc_node, job_ptr->nodes, (unsigned long) job_ptr->total_cpus, (unsigned long) job_ptr->total_nodes, (unsigned long) job_ptr->derived_ec, (unsigned long) job_ptr->exit_code, state_string); if (nwritten >= B_SIZE) { B_SIZE += nwritten + 1; buffer = xrealloc(buffer, B_SIZE); nwritten = snprintf(buffer, B_SIZE, JOBCOMP_DATA_FORMAT, (unsigned long) job_ptr->job_id, usr_str, (unsigned long) job_ptr->user_id, grp_str, (unsigned long) job_ptr->group_id, start_str, end_str, (long) elapsed_time, job_ptr->partition, job_ptr->alloc_node, job_ptr->nodes, (unsigned long) job_ptr->total_cpus, (unsigned long) job_ptr->total_nodes, (unsigned long) job_ptr->derived_ec, (unsigned long) job_ptr->exit_code, state_string); if (nwritten >= B_SIZE) { error("%s: Job completion data truncated and lost", plugin_type); return SLURM_ERROR; } } snprintf(tmp_str, sizeof(tmp_str), ",\"cpu_hours\":%.6f", ((float) elapsed_time * (float) job_ptr->total_cpus) / (float) 3600); xstrcat(buffer, tmp_str); if (job_ptr->array_task_id != NO_VAL) { xstrfmtcat(buffer, ",\"array_job_id\":%lu", (unsigned long) job_ptr->array_job_id); xstrfmtcat(buffer, ",\"array_task_id\":%lu", (unsigned long) job_ptr->array_task_id); } if (job_ptr->details && (job_ptr->details->submit_time != NO_VAL)) { submit_time = job_ptr->details->submit_time; _make_time_str(&submit_time, submit_str, sizeof(submit_str)); xstrfmtcat(buffer, ",\"@submit\":\"%s\"", submit_str); } if (job_ptr->details && (job_ptr->details->begin_time != NO_VAL)) { eligible_time = job_ptr->start_time - job_ptr->details->begin_time; xstrfmtcat(buffer, ",\"eligible_time\":%lu", eligible_time); } if (job_ptr->details && (job_ptr->details->work_dir && job_ptr->details->work_dir[0])) { xstrfmtcat(buffer, ",\"work_dir\":\"%s\"", job_ptr->details->work_dir); } if (job_ptr->details && (job_ptr->details->std_err && job_ptr->details->std_err[0])) { xstrfmtcat(buffer, ",\"std_err\":\"%s\"", job_ptr->details->std_err); } if (job_ptr->details && (job_ptr->details->std_in && job_ptr->details->std_in[0])) { xstrfmtcat(buffer, ",\"std_in\":\"%s\"", job_ptr->details->std_in); } if (job_ptr->details && (job_ptr->details->std_out && job_ptr->details->std_out[0])) { xstrfmtcat(buffer, ",\"std_out\":\"%s\"", job_ptr->details->std_out); } if (job_ptr->assoc_ptr != NULL) { cluster = ((slurmdb_assoc_rec_t *) job_ptr->assoc_ptr)->cluster; xstrfmtcat(buffer, ",\"cluster\":\"%s\"", cluster); } if (job_ptr->qos_ptr != NULL) { slurmdb_qos_rec_t *assoc = (slurmdb_qos_rec_t *) job_ptr->qos_ptr; qos = assoc->name; xstrfmtcat(buffer, ",\"qos\":\"%s\"", qos); } if (job_ptr->details && (job_ptr->details->num_tasks != NO_VAL)) { xstrfmtcat(buffer, ",\"ntasks\":%hu", job_ptr->details->num_tasks); } if (job_ptr->details && (job_ptr->details->ntasks_per_node != NO_VAL)) { ntasks_per_node = job_ptr->details->ntasks_per_node; xstrfmtcat(buffer, ",\"ntasks_per_node\":%hu", ntasks_per_node); } if (job_ptr->details && (job_ptr->details->cpus_per_task != NO_VAL)) { xstrfmtcat(buffer, ",\"cpus_per_task\":%hu", job_ptr->details->cpus_per_task); } if (job_ptr->details && (job_ptr->details->orig_dependency && job_ptr->details->orig_dependency[0])) { xstrfmtcat(buffer, ",\"orig_dependency\":\"%s\"", job_ptr->details->orig_dependency); } if (job_ptr->details && (job_ptr->details->exc_nodes && job_ptr->details->exc_nodes[0])) { xstrfmtcat(buffer, ",\"excluded_nodes\":\"%s\"", job_ptr->details->exc_nodes); } if (time_limit != INFINITE) { xstrfmtcat(buffer, ",\"time_limit\":%lu", (unsigned long) time_limit * 60); } if (job_ptr->resv_name && job_ptr->resv_name[0]) { xstrfmtcat(buffer, ",\"reservation_name\":\"%s\"", job_ptr->resv_name); } if (job_ptr->gres_req && job_ptr->gres_req[0]) { xstrfmtcat(buffer, ",\"gres_req\":\"%s\"", job_ptr->gres_req); } if (job_ptr->gres_alloc && job_ptr->gres_alloc[0]) { xstrfmtcat(buffer, ",\"gres_alloc\":\"%s\"", job_ptr->gres_alloc); } if (job_ptr->account && job_ptr->account[0]) { xstrfmtcat(buffer, ",\"account\":\"%s\"", job_ptr->account); } script = get_job_script(job_ptr); if (script && script[0]) { script_str = _json_escape(script); xstrfmtcat(buffer, ",\"script\":\"%s\"", script_str); xfree(script_str); } xfree(script); if (job_ptr->assoc_ptr) { assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; slurmdb_assoc_rec_t *assoc_ptr = job_ptr->assoc_ptr; char *parent_accounts = NULL; char **acc_aux = NULL; int nparents = 0; assoc_mgr_lock(&locks); /* Start at the first parent and go up. When studying * this code it was slightly faster to do 2 loops on * the association linked list and only 1 xmalloc but * we opted for cleaner looking code and going with a * realloc. */ while (assoc_ptr) { if (assoc_ptr->acct) { acc_aux = xrealloc(acc_aux, sizeof(char *) * (nparents + 1)); acc_aux[nparents++] = assoc_ptr->acct; } assoc_ptr = assoc_ptr->usage->parent_assoc_ptr; } for (i = nparents - 1; i >= 0; i--) xstrfmtcat(parent_accounts, "/%s", acc_aux[i]); xfree(acc_aux); xstrfmtcat(buffer, ",\"parent_accounts\":\"%s\"", parent_accounts); xfree(parent_accounts); assoc_mgr_unlock(&locks); } xstrcat(buffer, "}"); jnode = xmalloc(sizeof(struct job_node)); jnode->serialized_job = xstrdup(buffer); list_enqueue(jobslist, jnode); return SLURM_SUCCESS; }
extern List pgsql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) { char *query = NULL; char *extra = NULL; char *tmp = NULL; char *selected_part = NULL; slurmdb_selected_step_t *selected_step = NULL; ListIterator itr = NULL; int set = 0; PGresult *result = NULL; int i; jobcomp_job_rec_t *job = NULL; char time_str[32]; time_t temp_time; List job_list = NULL; if (job_cond->step_list && list_count(job_cond->step_list)) { set = 0; xstrcat(extra, " where ("); itr = list_iterator_create(job_cond->step_list); while((selected_step = list_next(itr))) { if (set) xstrcat(extra, " || "); tmp = xstrdup_printf("jobid=%d", selected_step->jobid); xstrcat(extra, tmp); set = 1; xfree(tmp); } list_iterator_destroy(itr); xstrcat(extra, ")"); } if (job_cond->partition_list && list_count(job_cond->partition_list)) { set = 0; if (extra) xstrcat(extra, " && ("); else xstrcat(extra, " where ("); itr = list_iterator_create(job_cond->partition_list); while((selected_part = list_next(itr))) { if (set) xstrcat(extra, " || "); tmp = xstrdup_printf("partition='%s'", selected_part); xstrcat(extra, tmp); set = 1; xfree(tmp); } list_iterator_destroy(itr); xstrcat(extra, ")"); } i = 0; while(jobcomp_table_fields[i].name) { if (i) xstrcat(tmp, ", "); xstrcat(tmp, jobcomp_table_fields[i].name); i++; } query = xstrdup_printf("select %s from %s", tmp, jobcomp_table); xfree(tmp); if (extra) { xstrcat(query, extra); xfree(extra); } //info("query = %s", query); if (!(result = pgsql_db_query_ret(jobcomp_pgsql_db, query))) { xfree(query); return NULL; } xfree(query); job_list = list_create(jobcomp_destroy_job); for (i = 0; i < PQntuples(result); i++) { job = xmalloc(sizeof(jobcomp_job_rec_t)); if (PQgetvalue(result, i, JOBCOMP_REQ_JOBID)) job->jobid = atoi(PQgetvalue(result, i, JOBCOMP_REQ_JOBID)); job->partition = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_PARTITION)); temp_time = atoi(PQgetvalue(result, i, JOBCOMP_REQ_STARTTIME)); slurm_make_time_str(&temp_time, time_str, sizeof(time_str)); job->start_time = xstrdup(time_str); temp_time = atoi(PQgetvalue(result, i, JOBCOMP_REQ_ENDTIME)); slurm_make_time_str(&temp_time, time_str, sizeof(time_str)); job->end_time = xstrdup(time_str); if (PQgetvalue(result, i, JOBCOMP_REQ_UID)) job->uid = atoi(PQgetvalue(result, i, JOBCOMP_REQ_UID)); job->uid_name = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_USER_NAME)); if (PQgetvalue(result, i, JOBCOMP_REQ_GID)) job->gid = atoi(PQgetvalue(result, i, JOBCOMP_REQ_GID)); job->gid_name = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_GROUP_NAME)); job->jobname = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_NAME)); job->nodelist = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_NODELIST)); if (PQgetvalue(result, i, JOBCOMP_REQ_NODECNT)) job->node_cnt = atoi(PQgetvalue(result, i, JOBCOMP_REQ_NODECNT)); if (PQgetvalue(result, i, JOBCOMP_REQ_STATE)) { int j = atoi(PQgetvalue(result, i, JOBCOMP_REQ_STATE)); job->state = xstrdup(job_state_string(j)); } job->timelimit = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_TIMELIMIT)); if (PQgetvalue(result, i, JOBCOMP_REQ_MAXPROCS)) job->max_procs = atoi(PQgetvalue(result, i, JOBCOMP_REQ_MAXPROCS)); job->blockid = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_BLOCKID)); job->connection = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_CONNECTION)); job->reboot = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_REBOOT)); job->rotate = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_ROTATE)); job->geo = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_GEOMETRY)); job->bg_start_point = xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_START)); list_append(job_list, job); } PQclear(result); return job_list; }
void parse_command_line(int argc, char **argv) { extern int optind; int c, i, optionIndex = 0; char *end = NULL, *start = NULL, *acct_type = NULL; slurmdb_selected_step_t *selected_step = NULL; ListIterator itr = NULL; struct stat stat_buf; char *dot = NULL; bool brief_output = FALSE, long_output = FALSE; bool all_users = 0; bool all_clusters = 0; slurmdb_job_cond_t *job_cond = params.job_cond; log_options_t opts = LOG_OPTS_STDERR_ONLY ; int verbosity; /* count of -v options */ bool set; static struct option long_options[] = { {"allusers", no_argument, 0, 'a'}, {"accounts", required_argument, 0, 'A'}, {"allocations", no_argument, ¶ms.opt_allocs, OPT_LONG_ALLOCS}, {"brief", no_argument, 0, 'b'}, {"completion", no_argument, ¶ms.opt_completion, 'c'}, {"duplicates", no_argument, ¶ms.opt_dup, OPT_LONG_DUP}, {"helpformat", no_argument, 0, 'e'}, {"help-fields", no_argument, 0, 'e'}, {"endtime", required_argument, 0, 'E'}, {"file", required_argument, 0, 'f'}, {"gid", required_argument, 0, 'g'}, {"group", required_argument, 0, 'g'}, {"help", no_argument, 0, 'h'}, {"helpformat", no_argument, ¶ms.opt_help, OPT_LONG_HELP}, {"name", required_argument, 0, OPT_LONG_NAME}, {"nnodes", required_argument, 0, 'i'}, {"ncpus", required_argument, 0, 'I'}, {"jobs", required_argument, 0, 'j'}, {"timelimit-min", required_argument, 0, 'k'}, {"timelimit-max", required_argument, 0, 'K'}, {"long", no_argument, 0, 'l'}, {"allclusters", no_argument, 0, 'L'}, {"cluster", required_argument, 0, 'M'}, {"clusters", required_argument, 0, 'M'}, {"nodelist", required_argument, 0, 'N'}, {"noheader", no_argument, 0, 'n'}, {"fields", required_argument, 0, 'o'}, {"format", required_argument, 0, 'o'}, {"parsable", no_argument, 0, 'p'}, {"parsable2", no_argument, 0, 'P'}, {"qos", required_argument, 0, 'q'}, {"partition", required_argument, 0, 'r'}, {"state", required_argument, 0, 's'}, {"starttime", required_argument, 0, 'S'}, {"truncate", no_argument, 0, 'T'}, {"uid", required_argument, 0, 'u'}, {"usage", no_argument, ¶ms.opt_help, OPT_LONG_USAGE}, {"user", required_argument, 0, 'u'}, {"verbose", no_argument, 0, 'v'}, {"version", no_argument, 0, 'V'}, {"wckeys", required_argument, 0, 'W'}, {"associations", required_argument, 0, 'x'}, {0, 0, 0, 0}}; params.opt_uid = getuid(); params.opt_gid = getgid(); verbosity = 0; log_init("sacct", opts, SYSLOG_FACILITY_DAEMON, NULL); opterr = 1; /* Let getopt report problems to the user */ while (1) { /* now cycle through the command line */ c = getopt_long(argc, argv, "aA:bcC:dDeE:f:g:hi:I:j:k:K:lLM:nN:o:OpPq:r:s:S:Ttu:vVW:x:X", long_options, &optionIndex); if (c == -1) break; switch (c) { case 'a': all_users = 1; break; case 'A': if(!job_cond->acct_list) job_cond->acct_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->acct_list, optarg); break; case 'b': brief_output = true; break; case 'c': params.opt_completion = 1; break; case 'C': /* 'C' is deprecated since 'M' is cluster on everything else. */ case 'M': if(!strcasecmp(optarg, "-1")) { all_clusters = 1; break; } all_clusters=0; if(!job_cond->cluster_list) job_cond->cluster_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->cluster_list, optarg); break; case 'D': params.opt_dup = 1; break; case 'e': params.opt_help = 2; break; case 'E': job_cond->usage_end = parse_time(optarg, 1); if (errno == ESLURM_INVALID_TIME_VALUE) exit(1); break; case 'f': xfree(params.opt_filein); params.opt_filein = xstrdup(optarg); break; case 'g': if(!job_cond->groupid_list) job_cond->groupid_list = list_create(slurm_destroy_char); _addto_id_char_list(job_cond->groupid_list, optarg, 1); break; case 'h': params.opt_help = 1; break; case 'i': set = get_resource_arg_range( optarg, "requested node range", (int *)&job_cond->nodes_min, (int *)&job_cond->nodes_max, true); if (set == false) { error("invalid node range -i '%s'", optarg); exit(1); } break; case 'I': set = get_resource_arg_range( optarg, "requested cpu range", (int *)&job_cond->cpus_min, (int *)&job_cond->cpus_max, true); if (set == false) { error("invalid cpu range -i '%s'", optarg); exit(1); } break; case 'j': if ((strspn(optarg, "0123456789, ") < strlen(optarg)) && (strspn(optarg, ".batch0123456789, ") < strlen(optarg))) { fprintf(stderr, "Invalid jobs list: %s\n", optarg); exit(1); } if(!job_cond->step_list) job_cond->step_list = list_create( slurmdb_destroy_selected_step); _addto_step_list(job_cond->step_list, optarg); break; case 'k': job_cond->timelimit_min = time_str2mins(optarg); if (((int32_t)job_cond->timelimit_min <= 0) && (job_cond->timelimit_min != INFINITE)) fatal("Invalid time limit specification"); break; case 'K': job_cond->timelimit_max = time_str2mins(optarg); if (((int32_t)job_cond->timelimit_max <= 0) && (job_cond->timelimit_max != INFINITE)) fatal("Invalid time limit specification"); break; case 'L': all_clusters = 1; break; case 'l': long_output = true; break; case 'n': print_fields_have_header = 0; break; case 'N': if(job_cond->used_nodes) { error("Aleady asked for nodes '%s'", job_cond->used_nodes); break; } job_cond->used_nodes = xstrdup(optarg); break; case OPT_LONG_NAME: if(!job_cond->jobname_list) job_cond->jobname_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->jobname_list, optarg); break; case 'o': xstrfmtcat(params.opt_field_list, "%s,", optarg); break; case 'p': print_fields_parsable_print = PRINT_FIELDS_PARSABLE_ENDING; break; case 'P': print_fields_parsable_print = PRINT_FIELDS_PARSABLE_NO_ENDING; break; case 'q': if (!g_qos_list) { slurmdb_qos_cond_t qos_cond; memset(&qos_cond, 0, sizeof(slurmdb_qos_cond_t)); qos_cond.with_deleted = 1; g_qos_list = slurmdb_qos_get( acct_db_conn, &qos_cond); } if(!job_cond->qos_list) job_cond->qos_list = list_create(slurm_destroy_char); if(!slurmdb_addto_qos_char_list(job_cond->qos_list, g_qos_list, optarg, 0)) fatal("problem processing qos list"); break; case 'r': if(!job_cond->partition_list) job_cond->partition_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->partition_list, optarg); break; case 's': if(!job_cond->state_list) job_cond->state_list = list_create(slurm_destroy_char); _addto_state_char_list(job_cond->state_list, optarg); break; case 'S': job_cond->usage_start = parse_time(optarg, 1); if (errno == ESLURM_INVALID_TIME_VALUE) exit(1); break; case 'T': job_cond->without_usage_truncation = 0; break; case 'U': params.opt_help = 3; break; case 'u': if(!strcmp(optarg, "-1")) { all_users = 1; break; } all_users = 0; if(!job_cond->userid_list) job_cond->userid_list = list_create(slurm_destroy_char); _addto_id_char_list(job_cond->userid_list, optarg, 0); break; case 'v': /* Handle -vvv thusly... */ verbosity++; break; case 'W': if(!job_cond->wckey_list) job_cond->wckey_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->wckey_list, optarg); break; case 'V': print_slurm_version(); exit(0); case 'x': if(!job_cond->associd_list) job_cond->associd_list = list_create(slurm_destroy_char); slurm_addto_char_list(job_cond->associd_list, optarg); break; case 't': case 'X': params.opt_allocs = 1; break; case ':': case '?': /* getopt() has explained it */ exit(1); } } if (verbosity) { opts.stderr_level += verbosity; opts.prefix_level = 1; log_alter(opts, 0, NULL); } /* Now set params.opt_dup, unless they've already done so */ if (params.opt_dup < 0) /* not already set explicitly */ params.opt_dup = 0; job_cond->duplicates = params.opt_dup; job_cond->without_steps = params.opt_allocs; if(!job_cond->usage_start && !job_cond->step_list) { struct tm start_tm; job_cond->usage_start = time(NULL); if (!localtime_r(&job_cond->usage_start, &start_tm)) { error("Couldn't get localtime from %ld", (long)job_cond->usage_start); return; } start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_isdst = -1; job_cond->usage_start = mktime(&start_tm); } if(verbosity > 0) { char *start_char =NULL, *end_char = NULL; start_char = xstrdup(ctime(&job_cond->usage_start)); /* remove the new line */ start_char[strlen(start_char)-1] = '\0'; if(job_cond->usage_end) { end_char = xstrdup(ctime(&job_cond->usage_end)); /* remove the new line */ end_char[strlen(end_char)-1] = '\0'; } else end_char = xstrdup("Now"); info("Jobs eligible from %s - %s", start_char, end_char); xfree(start_char); xfree(end_char); } debug("Options selected:\n" "\topt_completion=%d\n" "\topt_dup=%d\n" "\topt_field_list=%s\n" "\topt_help=%d\n" "\topt_allocs=%d", params.opt_completion, params.opt_dup, params.opt_field_list, params.opt_help, params.opt_allocs); if(params.opt_completion) { g_slurm_jobcomp_init(params.opt_filein); acct_type = slurm_get_jobcomp_type(); if ((strcmp(acct_type, "jobcomp/none") == 0) && (stat(params.opt_filein, &stat_buf) != 0)) { fprintf(stderr, "SLURM job completion is disabled\n"); exit(1); } xfree(acct_type); } else { slurm_acct_storage_init(params.opt_filein); acct_type = slurm_get_accounting_storage_type(); if ((strcmp(acct_type, "accounting_storage/none") == 0) && (stat(params.opt_filein, &stat_buf) != 0)) { fprintf(stderr, "SLURM accounting storage is disabled\n"); exit(1); } xfree(acct_type); acct_db_conn = slurmdb_connection_get(); if(errno != SLURM_SUCCESS) { error("Problem talking to the database: %m"); exit(1); } } /* specific clusters requested? */ if(all_clusters) { if(job_cond->cluster_list && list_count(job_cond->cluster_list)) { list_destroy(job_cond->cluster_list); job_cond->cluster_list = NULL; } debug2("Clusters requested:\tall"); } else if (job_cond->cluster_list && list_count(job_cond->cluster_list)) { debug2( "Clusters requested:"); itr = list_iterator_create(job_cond->cluster_list); while((start = list_next(itr))) debug2("\t: %s", start); list_iterator_destroy(itr); } else if(!job_cond->cluster_list || !list_count(job_cond->cluster_list)) { if(!job_cond->cluster_list) job_cond->cluster_list = list_create(slurm_destroy_char); if((start = slurm_get_cluster_name())) { list_append(job_cond->cluster_list, start); debug2("Clusters requested:\t%s", start); } } /* if any jobs or nodes are specified set to look for all users if none are set */ if(!job_cond->userid_list || !list_count(job_cond->userid_list)) if((job_cond->step_list && list_count(job_cond->step_list)) || job_cond->used_nodes) all_users=1; /* set all_users for user root if not requesting any */ if(!job_cond->userid_list && !params.opt_uid) all_users = 1; if(all_users) { if(job_cond->userid_list && list_count(job_cond->userid_list)) { list_destroy(job_cond->userid_list); job_cond->userid_list = NULL; } debug2("Userids requested:\tall"); } else if (job_cond->userid_list && list_count(job_cond->userid_list)) { debug2("Userids requested:"); itr = list_iterator_create(job_cond->userid_list); while((start = list_next(itr))) debug2("\t: %s", start); list_iterator_destroy(itr); } else if(!job_cond->userid_list || !list_count(job_cond->userid_list)) { if(!job_cond->userid_list) job_cond->userid_list = list_create(slurm_destroy_char); start = xstrdup_printf("%u", params.opt_uid); list_append(job_cond->userid_list, start); debug2("Userid requested\t: %s", start); } if (job_cond->groupid_list && list_count(job_cond->groupid_list)) { debug2("Groupids requested:"); itr = list_iterator_create(job_cond->groupid_list); while((start = list_next(itr))) debug2("\t: %s", start); list_iterator_destroy(itr); } /* specific partitions requested? */ if (job_cond->partition_list && list_count(job_cond->partition_list)) { debug2("Partitions requested:"); itr = list_iterator_create(job_cond->partition_list); while((start = list_next(itr))) debug2("\t: %s", start); list_iterator_destroy(itr); } /* specific qos' requested? */ if (job_cond->qos_list && list_count(job_cond->qos_list)) { start = get_qos_complete_str(g_qos_list, job_cond->qos_list); debug2("QOS requested\t: %s\n", start); xfree(start); } /* specific jobs requested? */ if (job_cond->step_list && list_count(job_cond->step_list)) { debug2("Jobs requested:"); itr = list_iterator_create(job_cond->step_list); while((selected_step = list_next(itr))) { if(selected_step->stepid != NO_VAL) debug2("\t: %d.%d", selected_step->jobid, selected_step->stepid); else debug2("\t: %d", selected_step->jobid); } list_iterator_destroy(itr); } /* specific states (completion state) requested? */ if (job_cond->state_list && list_count(job_cond->state_list)) { debug2("States requested:"); itr = list_iterator_create(job_cond->state_list); while((start = list_next(itr))) { debug2("\t: %s", job_state_string(atoi(start))); } list_iterator_destroy(itr); } if (job_cond->wckey_list && list_count(job_cond->wckey_list)) { debug2("Wckeys requested:"); itr = list_iterator_create(job_cond->wckey_list); while((start = list_next(itr))) debug2("\t: %s\n", start); list_iterator_destroy(itr); } if (job_cond->timelimit_min) { char time_str[128], tmp1[32], tmp2[32]; mins2time_str(job_cond->timelimit_min, tmp1, sizeof(tmp1)); sprintf(time_str, "%s", tmp1); if(job_cond->timelimit_max) { int len = strlen(tmp1); mins2time_str(job_cond->timelimit_max, tmp2, sizeof(tmp2)); sprintf(time_str+len, " - %s", tmp2); } debug2("Timelimit requested\t: %s", time_str); } /* specific jobnames requested? */ if (job_cond->jobname_list && list_count(job_cond->jobname_list)) { debug2("Jobnames requested:"); itr = list_iterator_create(job_cond->jobname_list); while((start = list_next(itr))) { debug2("\t: %s", start); } list_iterator_destroy(itr); } /* select the output fields */ if(brief_output) { if(params.opt_completion) dot = BRIEF_COMP_FIELDS; else dot = BRIEF_FIELDS; xstrfmtcat(params.opt_field_list, "%s,", dot); } if(long_output) { if(params.opt_completion) dot = LONG_COMP_FIELDS; else dot = LONG_FIELDS; xstrfmtcat(params.opt_field_list, "%s,", dot); } if (params.opt_field_list==NULL) { if(params.opt_completion) dot = DEFAULT_COMP_FIELDS; else dot = DEFAULT_FIELDS; xstrfmtcat(params.opt_field_list, "%s,", dot); } start = params.opt_field_list; while ((end = strstr(start, ","))) { char *tmp_char = NULL; int command_len = 0; int newlen = 0; *end = 0; while (isspace(*start)) start++; /* discard whitespace */ if(!(int)*start) continue; if((tmp_char = strstr(start, "\%"))) { newlen = atoi(tmp_char+1); tmp_char[0] = '\0'; } command_len = strlen(start); for (i = 0; fields[i].name; i++) { if (!strncasecmp(fields[i].name, start, command_len)) goto foundfield; } error("Invalid field requested: \"%s\"", start); exit(1); foundfield: if(newlen) fields[i].len = newlen; list_append(print_fields_list, &fields[i]); start = end + 1; }
/* * slurm_sprint_job_info - output information about a specific Slurm * job based upon message as loaded using slurm_load_jobs * IN job_ptr - an individual job information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ extern char * slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) { int i, j; char time_str[32], *group_name, *user_name; char tmp1[128], tmp2[128], tmp3[128], tmp4[128], tmp5[128], *tmp6_ptr; char tmp_line[512]; char *ionodes = NULL; uint16_t exit_status = 0, term_sig = 0; job_resources_t *job_resrcs = job_ptr->job_resrcs; char *out = NULL; time_t run_time; uint32_t min_nodes, max_nodes = 0; char *nodelist = "NodeList"; bitstr_t *core_bitmap; char *host; int sock_inx, sock_reps, last; int abs_node_inx, rel_node_inx; int bit_inx, bit_reps; uint32_t *last_mem_alloc_ptr = NULL; uint32_t last_mem_alloc = NO_VAL; char *last_hosts; hostlist_t hl, hl_last; char select_buf[122]; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); if (cluster_flags & CLUSTER_FLAG_BG) { nodelist = "MidplaneList"; select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); } /****** Line 1 ******/ snprintf(tmp_line, sizeof(tmp_line), "JobId=%u ", job_ptr->job_id); out = xstrdup(tmp_line); if (job_ptr->array_job_id) { snprintf(tmp_line, sizeof(tmp_line), "ArrayJobId=%u ArrayTaskId=%u ", job_ptr->array_job_id, job_ptr->array_task_id); xstrcat(out, tmp_line); } snprintf(tmp_line, sizeof(tmp_line), "Name=%s", job_ptr->name); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 2 ******/ user_name = uid_to_string((uid_t) job_ptr->user_id); group_name = gid_to_string((gid_t) job_ptr->group_id); snprintf(tmp_line, sizeof(tmp_line), "UserId=%s(%u) GroupId=%s(%u)", user_name, job_ptr->user_id, group_name, job_ptr->group_id); xfree(user_name); xfree(group_name); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 3 ******/ snprintf(tmp_line, sizeof(tmp_line), "Priority=%u Account=%s QOS=%s", job_ptr->priority, job_ptr->account, job_ptr->qos); xstrcat(out, tmp_line); if (slurm_get_track_wckey()) { snprintf(tmp_line, sizeof(tmp_line), " WCKey=%s", job_ptr->wckey); xstrcat(out, tmp_line); } if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 4 ******/ if (job_ptr->state_desc) { /* Replace white space with underscore for easier parsing */ for (j=0; job_ptr->state_desc[j]; j++) { if (isspace((int)job_ptr->state_desc[j])) job_ptr->state_desc[j] = '_'; } tmp6_ptr = job_ptr->state_desc; } else tmp6_ptr = job_reason_string(job_ptr->state_reason); snprintf(tmp_line, sizeof(tmp_line), "JobState=%s Reason=%s Dependency=%s", job_state_string(job_ptr->job_state), tmp6_ptr, job_ptr->dependency); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 5 ******/ snprintf(tmp_line, sizeof(tmp_line), "Requeue=%u Restarts=%u BatchFlag=%u ", job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag); xstrcat(out, tmp_line); if (WIFSIGNALED(job_ptr->exit_code)) term_sig = WTERMSIG(job_ptr->exit_code); exit_status = WEXITSTATUS(job_ptr->exit_code); snprintf(tmp_line, sizeof(tmp_line), "ExitCode=%u:%u", exit_status, term_sig); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 5a (optional) ******/ if (!(job_ptr->show_flags & SHOW_DETAIL)) goto line6; if (WIFSIGNALED(job_ptr->derived_ec)) term_sig = WTERMSIG(job_ptr->derived_ec); else term_sig = 0; exit_status = WEXITSTATUS(job_ptr->derived_ec); snprintf(tmp_line, sizeof(tmp_line), "DerivedExitCode=%u:%u", exit_status, term_sig); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 6 ******/ line6: snprintf(tmp_line, sizeof(tmp_line), "RunTime="); xstrcat(out, tmp_line); if (IS_JOB_PENDING(job_ptr)) run_time = 0; else if (IS_JOB_SUSPENDED(job_ptr)) run_time = job_ptr->pre_sus_time; else { time_t end_time; if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0)) end_time = time(NULL); else end_time = job_ptr->end_time; if (job_ptr->suspend_time) { run_time = (time_t) (difftime(end_time, job_ptr->suspend_time) + job_ptr->pre_sus_time); } else run_time = (time_t) difftime(end_time, job_ptr->start_time); } secs2time_str(run_time, tmp1, sizeof(tmp1)); sprintf(tmp_line, "%s ", tmp1); xstrcat(out, tmp_line); snprintf(tmp_line, sizeof(tmp_line), "TimeLimit="); xstrcat(out, tmp_line); if (job_ptr->time_limit == NO_VAL) sprintf(tmp_line, "Partition_Limit"); else { mins2time_str(job_ptr->time_limit, tmp_line, sizeof(tmp_line)); } xstrcat(out, tmp_line); snprintf(tmp_line, sizeof(tmp_line), " TimeMin="); xstrcat(out, tmp_line); if (job_ptr->time_min == 0) sprintf(tmp_line, "N/A"); else { mins2time_str(job_ptr->time_min, tmp_line, sizeof(tmp_line)); } xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 7 ******/ slurm_make_time_str((time_t *)&job_ptr->submit_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "SubmitTime=%s ", time_str); xstrcat(out, tmp_line); slurm_make_time_str((time_t *)&job_ptr->eligible_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "EligibleTime=%s", time_str); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 8 (optional) ******/ if (job_ptr->resize_time) { slurm_make_time_str((time_t *)&job_ptr->resize_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "ResizeTime=%s", time_str); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } /****** Line 9 ******/ slurm_make_time_str((time_t *)&job_ptr->start_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "StartTime=%s ", time_str); xstrcat(out, tmp_line); snprintf(tmp_line, sizeof(tmp_line), "EndTime="); xstrcat(out, tmp_line); if ((job_ptr->time_limit == INFINITE) && (job_ptr->end_time > time(NULL))) sprintf(tmp_line, "Unknown"); else { slurm_make_time_str ((time_t *)&job_ptr->end_time, time_str, sizeof(time_str)); sprintf(tmp_line, "%s", time_str); } xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 10 ******/ if (job_ptr->preempt_time == 0) sprintf(tmp_line, "PreemptTime=None "); else { slurm_make_time_str((time_t *)&job_ptr->preempt_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "PreemptTime=%s ", time_str); } xstrcat(out, tmp_line); if (job_ptr->suspend_time) { slurm_make_time_str ((time_t *)&job_ptr->suspend_time, time_str, sizeof(time_str)); } else { strncpy(time_str, "None", sizeof(time_str)); } snprintf(tmp_line, sizeof(tmp_line), "SuspendTime=%s SecsPreSuspend=%ld", time_str, (long int)job_ptr->pre_sus_time); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 11 ******/ snprintf(tmp_line, sizeof(tmp_line), "Partition=%s AllocNode:Sid=%s:%u", job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 12 ******/ snprintf(tmp_line, sizeof(tmp_line), "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes, nodelist, job_ptr->exc_nodes); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 13 ******/ xstrfmtcat(out, "%s=", nodelist); xstrcat(out, job_ptr->nodes); if (job_ptr->nodes && ionodes) { snprintf(tmp_line, sizeof(tmp_line), "[%s]", ionodes); xstrcat(out, tmp_line); xfree(ionodes); } if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 14 (optional) ******/ if (job_ptr->batch_host) { snprintf(tmp_line, sizeof(tmp_line), "BatchHost=%s", job_ptr->batch_host); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } /****** Line 15 ******/ if (cluster_flags & CLUSTER_FLAG_BG) { select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &min_nodes); if ((min_nodes == 0) || (min_nodes == NO_VAL)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; } else if (job_ptr->max_nodes) max_nodes = min_nodes; } else { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; } _sprint_range(tmp1, sizeof(tmp1), job_ptr->num_cpus, job_ptr->max_cpus); _sprint_range(tmp2, sizeof(tmp2), min_nodes, max_nodes); if (job_ptr->sockets_per_node == (uint16_t) NO_VAL) strcpy(tmp3, "*"); else snprintf(tmp3, sizeof(tmp3), "%u", job_ptr->sockets_per_node); if (job_ptr->cores_per_socket == (uint16_t) NO_VAL) strcpy(tmp4, "*"); else snprintf(tmp4, sizeof(tmp4), "%u", job_ptr->cores_per_socket); if (job_ptr->threads_per_core == (uint16_t) NO_VAL) strcpy(tmp5, "*"); else snprintf(tmp5, sizeof(tmp5), "%u", job_ptr->threads_per_core); snprintf(tmp_line, sizeof(tmp_line), "NumNodes=%s NumCPUs=%s CPUs/Task=%u ReqS:C:T=%s:%s:%s", tmp2, tmp1, job_ptr->cpus_per_task, tmp3, tmp4, tmp5); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); if (!job_resrcs) goto line15; if (cluster_flags & CLUSTER_FLAG_BG) { if ((job_resrcs->cpu_array_cnt > 0) && (job_resrcs->cpu_array_value) && (job_resrcs->cpu_array_reps)) { int length = 0; xstrcat(out, "CPUs="); length += 10; for (i = 0; i < job_resrcs->cpu_array_cnt; i++) { if (length > 70) { /* skip to last CPU group entry */ if (i < job_resrcs->cpu_array_cnt - 1) { continue; } /* add ellipsis before last entry */ xstrcat(out, "...,"); length += 4; } snprintf(tmp_line, sizeof(tmp_line), "%d", job_resrcs->cpus[i]); xstrcat(out, tmp_line); length += strlen(tmp_line); if (job_resrcs->cpu_array_reps[i] > 1) { snprintf(tmp_line, sizeof(tmp_line), "*%d", job_resrcs->cpu_array_reps[i]); xstrcat(out, tmp_line); length += strlen(tmp_line); } if (i < job_resrcs->cpu_array_cnt - 1) { xstrcat(out, ","); length++; } } if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } } else { if (!job_resrcs->core_bitmap) goto line15; last = bit_fls(job_resrcs->core_bitmap); if (last == -1) goto line15; hl = hostlist_create(job_ptr->nodes); if (!hl) { error("slurm_sprint_job_info: hostlist_create: %s", job_ptr->nodes); return NULL; } hl_last = hostlist_create(NULL); if (!hl_last) { error("slurm_sprint_job_info: hostlist_create: NULL"); hostlist_destroy(hl); return NULL; } bit_inx = 0; i = sock_inx = sock_reps = 0; abs_node_inx = job_ptr->node_inx[i]; /* tmp1[] stores the current cpu(s) allocated */ tmp2[0] = '\0'; /* stores last cpu(s) allocated */ for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts; rel_node_inx++) { if (sock_reps >= job_resrcs->sock_core_rep_count[sock_inx]) { sock_inx++; sock_reps = 0; } sock_reps++; bit_reps = job_resrcs->sockets_per_node[sock_inx] * job_resrcs->cores_per_socket[sock_inx]; core_bitmap = bit_alloc(bit_reps); for (j=0; j < bit_reps; j++) { if (bit_test(job_resrcs->core_bitmap, bit_inx)) bit_set(core_bitmap, j); bit_inx++; } bit_fmt(tmp1, sizeof(tmp1), core_bitmap); FREE_NULL_BITMAP(core_bitmap); host = hostlist_shift(hl); /* * If the allocation values for this host are not the same as the * last host, print the report of the last group of hosts that had * identical allocation values. */ if (strcmp(tmp1, tmp2) || (last_mem_alloc_ptr != job_resrcs->memory_allocated) || (job_resrcs->memory_allocated && (last_mem_alloc != job_resrcs->memory_allocated[rel_node_inx]))) { if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc( hl_last); snprintf(tmp_line, sizeof(tmp_line), " Nodes=%s CPU_IDs=%s Mem=%u", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0); xfree(last_hosts); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); hostlist_destroy(hl_last); hl_last = hostlist_create(NULL); } strcpy(tmp2, tmp1); last_mem_alloc_ptr = job_resrcs->memory_allocated; if (last_mem_alloc_ptr) last_mem_alloc = job_resrcs-> memory_allocated[rel_node_inx]; else last_mem_alloc = NO_VAL; } hostlist_push_host(hl_last, host); free(host); if (bit_inx > last) break; if (abs_node_inx > job_ptr->node_inx[i+1]) { i += 2; abs_node_inx = job_ptr->node_inx[i]; } else { abs_node_inx++; } } if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc(hl_last); snprintf(tmp_line, sizeof(tmp_line), " Nodes=%s CPU_IDs=%s Mem=%u", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0); xfree(last_hosts); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } hostlist_destroy(hl); hostlist_destroy(hl_last); } /****** Line 15 ******/ line15: if (job_ptr->pn_min_memory & MEM_PER_CPU) { job_ptr->pn_min_memory &= (~MEM_PER_CPU); tmp6_ptr = "CPU"; } else tmp6_ptr = "Node"; if (cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)job_ptr->pn_min_cpus, tmp1, sizeof(tmp1), UNIT_NONE); snprintf(tmp_line, sizeof(tmp_line), "MinCPUsNode=%s", tmp1); } else { snprintf(tmp_line, sizeof(tmp_line), "MinCPUsNode=%u", job_ptr->pn_min_cpus); } xstrcat(out, tmp_line); convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1), UNIT_MEGA); convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2), UNIT_MEGA); snprintf(tmp_line, sizeof(tmp_line), " MinMemory%s=%s MinTmpDiskNode=%s", tmp6_ptr, tmp1, tmp2); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 16 ******/ snprintf(tmp_line, sizeof(tmp_line), "Features=%s Gres=%s Reservation=%s", job_ptr->features, job_ptr->gres, job_ptr->resv_name); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 17 ******/ snprintf(tmp_line, sizeof(tmp_line), "Shared=%s Contiguous=%d Licenses=%s Network=%s", (job_ptr->shared == 0 ? "0" : job_ptr->shared == 1 ? "1" : "OK"), job_ptr->contiguous, job_ptr->licenses, job_ptr->network); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 18 ******/ snprintf(tmp_line, sizeof(tmp_line), "Command=%s", job_ptr->command); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 19 ******/ snprintf(tmp_line, sizeof(tmp_line), "WorkDir=%s", job_ptr->work_dir); xstrcat(out, tmp_line); if (cluster_flags & CLUSTER_FLAG_BG) { /****** Line 20 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_BG_ID); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "Block_ID=%s", select_buf); xstrcat(out, tmp_line); } /****** Line 21 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED_SHORT); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); xstrcat(out, select_buf); } if (cluster_flags & CLUSTER_FLAG_BGL) { /****** Line 22 (optional) ******/ select_g_select_jobinfo_sprint( job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_BLRTS_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "BlrtsImage=%s", select_buf); xstrcat(out, tmp_line); } } /****** Line 23 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_LINUX_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); if (cluster_flags & CLUSTER_FLAG_BGL) snprintf(tmp_line, sizeof(tmp_line), "LinuxImage=%s", select_buf); else snprintf(tmp_line, sizeof(tmp_line), "CnloadImage=%s", select_buf); xstrcat(out, tmp_line); } /****** Line 24 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MLOADER_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "MloaderImage=%s", select_buf); xstrcat(out, tmp_line); } /****** Line 25 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_RAMDISK_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); if (cluster_flags & CLUSTER_FLAG_BGL) snprintf(tmp_line, sizeof(tmp_line), "RamDiskImage=%s", select_buf); else snprintf(tmp_line, sizeof(tmp_line), "IoloadImage=%s", select_buf); xstrcat(out, tmp_line); } } /****** Line 26 (optional) ******/ if (job_ptr->comment) { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "Comment=%s ", job_ptr->comment); xstrcat(out, tmp_line); } /****** Line 27 (optional) ******/ if (job_ptr->batch_script) { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); xstrcat(out, "BatchScript=\n"); xstrcat(out, job_ptr->batch_script); } /****** Line 28 (optional) ******/ if (job_ptr->req_switch) { char time_buf[32]; if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); secs2time_str((time_t) job_ptr->wait4switch, time_buf, sizeof(time_buf)); snprintf(tmp_line, sizeof(tmp_line), "Switches=%u@%s\n", job_ptr->req_switch, time_buf); xstrcat(out, tmp_line); } /****** Line 29 (optional) ******/ if (one_liner) xstrcat(out, "\n"); else xstrcat(out, "\n\n"); return out; }
static int _attempt_backfill(void) { bool filter_root = false; List job_queue; job_queue_rec_t *job_queue_rec; slurmdb_qos_rec_t *qos_ptr = NULL; int i, j, node_space_recs; struct job_record *job_ptr; struct part_record *part_ptr; uint32_t end_time, end_reserve; uint32_t time_limit, comp_time_limit, orig_time_limit; uint32_t min_nodes, max_nodes, req_nodes; bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL; time_t now = time(NULL), sched_start, later_start, start_res; node_space_map_t *node_space; static int sched_timeout = 0; int this_sched_timeout = 0, rc = 0; sched_start = now; if (sched_timeout == 0) { sched_timeout = slurm_get_msg_timeout() / 2; sched_timeout = MAX(sched_timeout, 1); sched_timeout = MIN(sched_timeout, 10); } this_sched_timeout = sched_timeout; #ifdef HAVE_CRAY /* * Run a Basil Inventory immediately before setting up the schedule * plan, to avoid race conditions caused by ALPS node state change. * Needs to be done with the node-state lock taken. */ if (select_g_reconfigure()) { debug4("backfill: not scheduling due to ALPS"); return SLURM_SUCCESS; } #endif if (slurm_get_root_filter()) filter_root = true; job_queue = build_job_queue(true); if (list_count(job_queue) <= 1) { debug("backfill: no jobs to backfill"); list_destroy(job_queue); return 0; } node_space = xmalloc(sizeof(node_space_map_t) * (max_backfill_job_cnt + 3)); node_space[0].begin_time = sched_start; node_space[0].end_time = sched_start + backfill_window; node_space[0].avail_bitmap = bit_copy(avail_node_bitmap); node_space[0].next = 0; node_space_recs = 1; if (debug_flags & DEBUG_FLAG_BACKFILL) _dump_node_space_table(node_space); while ((job_queue_rec = (job_queue_rec_t *) list_pop_bottom(job_queue, sort_job_queue2))) { job_ptr = job_queue_rec->job_ptr; part_ptr = job_queue_rec->part_ptr; xfree(job_queue_rec); if (!IS_JOB_PENDING(job_ptr)) continue; /* started in other partition */ job_ptr->part_ptr = part_ptr; if (debug_flags & DEBUG_FLAG_BACKFILL) info("backfill test for job %u", job_ptr->job_id); if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) || (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) || (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT) || (job_ptr->state_reason == WAIT_QOS_JOB_LIMIT) || (job_ptr->state_reason == WAIT_QOS_RESOURCE_LIMIT) || (job_ptr->state_reason == WAIT_QOS_TIME_LIMIT) || !acct_policy_job_runnable(job_ptr)) { debug2("backfill: job %u is not allowed to run now. " "Skipping it. State=%s. Reason=%s. Priority=%u", job_ptr->job_id, job_state_string(job_ptr->job_state), job_reason_string(job_ptr->state_reason), job_ptr->priority); continue; } if (((part_ptr->state_up & PARTITION_SCHED) == 0) || (part_ptr->node_bitmap == NULL)) continue; if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) && filter_root) continue; if ((!job_independent(job_ptr, 0)) || (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS)) continue; /* Determine minimum and maximum node counts */ min_nodes = MAX(job_ptr->details->min_nodes, part_ptr->min_nodes); if (job_ptr->details->max_nodes == 0) max_nodes = part_ptr->max_nodes; else max_nodes = MIN(job_ptr->details->max_nodes, part_ptr->max_nodes); max_nodes = MIN(max_nodes, 500000); /* prevent overflows */ if (job_ptr->details->max_nodes) req_nodes = max_nodes; else req_nodes = min_nodes; if (min_nodes > max_nodes) { /* job's min_nodes exceeds partition's max_nodes */ continue; } /* Determine job's expected completion time */ if (job_ptr->time_limit == NO_VAL) { if (part_ptr->max_time == INFINITE) time_limit = 365 * 24 * 60; /* one year */ else time_limit = part_ptr->max_time; } else { if (part_ptr->max_time == INFINITE) time_limit = job_ptr->time_limit; else time_limit = MIN(job_ptr->time_limit, part_ptr->max_time); } comp_time_limit = time_limit; orig_time_limit = job_ptr->time_limit; if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) time_limit = job_ptr->time_limit = 1; else if (job_ptr->time_min && (job_ptr->time_min < time_limit)) time_limit = job_ptr->time_limit = job_ptr->time_min; /* Determine impact of any resource reservations */ later_start = now; TRY_LATER: FREE_NULL_BITMAP(avail_bitmap); start_res = later_start; later_start = 0; j = job_test_resv(job_ptr, &start_res, true, &avail_bitmap); if (j != SLURM_SUCCESS) { job_ptr->time_limit = orig_time_limit; continue; } if (start_res > now) end_time = (time_limit * 60) + start_res; else end_time = (time_limit * 60) + now; /* Identify usable nodes for this job */ bit_and(avail_bitmap, part_ptr->node_bitmap); bit_and(avail_bitmap, up_node_bitmap); for (j=0; ; ) { if ((node_space[j].end_time > start_res) && node_space[j].next && (later_start == 0)) later_start = node_space[j].end_time; if (node_space[j].end_time <= start_res) ; else if (node_space[j].begin_time <= end_time) { bit_and(avail_bitmap, node_space[j].avail_bitmap); } else break; if ((j = node_space[j].next) == 0) break; } if (job_ptr->details->exc_node_bitmap) { bit_not(job_ptr->details->exc_node_bitmap); bit_and(avail_bitmap, job_ptr->details->exc_node_bitmap); bit_not(job_ptr->details->exc_node_bitmap); } /* Test if insufficient nodes remain OR * required nodes missing OR * nodes lack features */ if ((bit_set_count(avail_bitmap) < min_nodes) || ((job_ptr->details->req_node_bitmap) && (!bit_super_set(job_ptr->details->req_node_bitmap, avail_bitmap))) || (job_req_node_filter(job_ptr, avail_bitmap))) { if (later_start) { job_ptr->start_time = 0; goto TRY_LATER; } job_ptr->time_limit = orig_time_limit; continue; } /* Identify nodes which are definitely off limits */ FREE_NULL_BITMAP(resv_bitmap); resv_bitmap = bit_copy(avail_bitmap); bit_not(resv_bitmap); if ((time(NULL) - sched_start) >= this_sched_timeout) { debug("backfill: loop taking too long, yielding locks"); if (_yield_locks()) { debug("backfill: system state changed, " "breaking out"); rc = 1; break; } else { this_sched_timeout += sched_timeout; } } /* this is the time consuming operation */ debug2("backfill: entering _try_sched for job %u.", job_ptr->job_id); j = _try_sched(job_ptr, &avail_bitmap, min_nodes, max_nodes, req_nodes); debug2("backfill: finished _try_sched for job %u.", job_ptr->job_id); now = time(NULL); if (j != SLURM_SUCCESS) { job_ptr->time_limit = orig_time_limit; job_ptr->start_time = 0; continue; /* not runable */ } if (start_res > job_ptr->start_time) { job_ptr->start_time = start_res; last_job_update = now; } if (job_ptr->start_time <= now) { int rc = _start_job(job_ptr, resv_bitmap); if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) job_ptr->time_limit = orig_time_limit; else if ((rc == SLURM_SUCCESS) && job_ptr->time_min) { /* Set time limit as high as possible */ job_ptr->time_limit = comp_time_limit; job_ptr->end_time = job_ptr->start_time + (comp_time_limit * 60); _reset_job_time_limit(job_ptr, now, node_space); time_limit = job_ptr->time_limit; } else { job_ptr->time_limit = orig_time_limit; } if (rc == ESLURM_ACCOUNTING_POLICY) { /* Unknown future start time, just skip job */ job_ptr->start_time = 0; continue; } else if (rc != SLURM_SUCCESS) { /* Planned to start job, but something bad * happended. */ job_ptr->start_time = 0; break; } else { /* Started this job, move to next one */ continue; } } else job_ptr->time_limit = orig_time_limit; if (later_start && (job_ptr->start_time > later_start)) { /* Try later when some nodes currently reserved for * pending jobs are free */ job_ptr->start_time = 0; goto TRY_LATER; } if (job_ptr->start_time > (sched_start + backfill_window)) { /* Starts too far in the future to worry about */ continue; } if (node_space_recs >= max_backfill_job_cnt) { /* Already have too many jobs to deal with */ break; } end_reserve = job_ptr->start_time + (time_limit * 60); if (_test_resv_overlap(node_space, avail_bitmap, job_ptr->start_time, end_reserve)) { /* This job overlaps with an existing reservation for * job to be backfill scheduled, which the sched * plugin does not know about. Try again later. */ later_start = job_ptr->start_time; job_ptr->start_time = 0; goto TRY_LATER; } /* * Add reservation to scheduling table if appropriate */ qos_ptr = job_ptr->qos_ptr; if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE)) continue; bit_not(avail_bitmap); _add_reservation(job_ptr->start_time, end_reserve, avail_bitmap, node_space, &node_space_recs); if (debug_flags & DEBUG_FLAG_BACKFILL) _dump_node_space_table(node_space); } FREE_NULL_BITMAP(avail_bitmap); FREE_NULL_BITMAP(resv_bitmap); for (i=0; ; ) { FREE_NULL_BITMAP(node_space[i].avail_bitmap); if ((i = node_space[i].next) == 0) break; } xfree(node_space); list_destroy(job_queue); return rc; }
static struct jobcomp_info * _jobcomp_info_create (struct job_record *job) { enum job_states state; struct jobcomp_info * j = xmalloc (sizeof (*j)); j->jobid = job->job_id; j->uid = job->user_id; j->gid = job->group_id; j->name = xstrdup (job->name); j->array_job_id = job->array_job_id; j->array_task_id = job->array_task_id; if (IS_JOB_RESIZING(job)) { state = JOB_RESIZING; j->jobstate = xstrdup (job_state_string (state)); if (job->resize_time) j->start = job->resize_time; else j->start = job->start_time; j->end = time(NULL); } else { /* Job state will typically have JOB_COMPLETING or JOB_RESIZING * flag set when called. We remove the flags to get the eventual * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */ state = job->job_state & JOB_STATE_BASE; j->jobstate = xstrdup (job_state_string (state)); if (job->resize_time) j->start = job->resize_time; else if (job->start_time > job->end_time) { /* Job cancelled while pending and * expected start time is in the future. */ j->start = 0; } else j->start = job->start_time; j->end = job->end_time; } j->partition = xstrdup (job->partition); if ((job->time_limit == NO_VAL) && job->part_ptr) j->limit = job->part_ptr->max_time; else j->limit = job->time_limit; j->submit = job->details ? job->details->submit_time:job->start_time; j->batch_flag = job->batch_flag; j->nodes = xstrdup (job->nodes); j->nprocs = job->total_cpus; j->nnodes = job->node_cnt; j->account = job->account ? xstrdup (job->account) : NULL; if (job->details && job->details->work_dir) j->work_dir = xstrdup(job->details->work_dir); else j->work_dir = xstrdup("unknown"); if (job->details) { if (job->details->std_in) j->std_in = xstrdup(job->details->std_in); if (job->details->std_out) j->std_out = xstrdup(job->details->std_out); if (job->details->std_err) j->std_err = xstrdup(job->details->std_err); } #ifdef HAVE_BG j->connect_type = select_g_select_jobinfo_xstrdup(job->select_jobinfo, SELECT_PRINT_CONNECTION); j->geometry = select_g_select_jobinfo_xstrdup(job->select_jobinfo, SELECT_PRINT_GEOMETRY); j->blockid = select_g_select_jobinfo_xstrdup(job->select_jobinfo, SELECT_PRINT_BG_ID); #endif return (j); }
/* * slurm_sprint_job_step_info - output information about a specific Slurm * job step based upon message as loaded using slurm_get_job_steps * IN job_ptr - an individual job step information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ char * slurm_sprint_job_step_info ( job_step_info_t * job_step_ptr, int one_liner ) { char tmp_node_cnt[40]; char time_str[32]; char limit_str[32]; char tmp_line[128]; char *out = NULL; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); /****** Line 1 ******/ slurm_make_time_str ((time_t *)&job_step_ptr->start_time, time_str, sizeof(time_str)); if (job_step_ptr->time_limit == INFINITE) sprintf(limit_str, "UNLIMITED"); else secs2time_str ((time_t)job_step_ptr->time_limit * 60, limit_str, sizeof(limit_str)); snprintf(tmp_line, sizeof(tmp_line), "StepId=%u.%u UserId=%u StartTime=%s TimeLimit=%s", job_step_ptr->job_id, job_step_ptr->step_id, job_step_ptr->user_id, time_str, limit_str); out = xstrdup(tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 2 ******/ snprintf(tmp_line, sizeof(tmp_line), "State=%s ", job_state_string(job_step_ptr->state)); xstrcat(out, tmp_line); if (cluster_flags & CLUSTER_FLAG_BG) { char *io_nodes; select_g_select_jobinfo_get(job_step_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &io_nodes); if (io_nodes) { snprintf(tmp_line, sizeof(tmp_line), "Partition=%s MidplaneList=%s[%s] Gres=%s", job_step_ptr->partition, job_step_ptr->nodes, io_nodes, job_step_ptr->gres); xfree(io_nodes); } else snprintf(tmp_line, sizeof(tmp_line), "Partition=%s MidplaneList=%s Gres=%s", job_step_ptr->partition, job_step_ptr->nodes, job_step_ptr->gres); } else { snprintf(tmp_line, sizeof(tmp_line), "Partition=%s NodeList=%s Gres=%s", job_step_ptr->partition, job_step_ptr->nodes, job_step_ptr->gres); } xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 3 ******/ if (cluster_flags & CLUSTER_FLAG_BGQ) { uint32_t nodes = 0; select_g_select_jobinfo_get(job_step_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &nodes); convert_num_unit((float)nodes, tmp_node_cnt, sizeof(tmp_node_cnt), UNIT_NONE); } else { convert_num_unit((float)_nodes_in_list(job_step_ptr->nodes), tmp_node_cnt, sizeof(tmp_node_cnt), UNIT_NONE); } snprintf(tmp_line, sizeof(tmp_line), "Nodes=%s Tasks=%u Name=%s Network=%s", tmp_node_cnt, job_step_ptr->num_tasks, job_step_ptr->name, job_step_ptr->network); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 4 ******/ snprintf(tmp_line, sizeof(tmp_line), "ResvPorts=%s Checkpoint=%u CheckpointDir=%s", job_step_ptr->resv_ports, job_step_ptr->ckpt_interval, job_step_ptr->ckpt_dir); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 5 ******/ if (job_step_ptr->cpu_freq == NO_VAL) { snprintf(tmp_line, sizeof(tmp_line), "CPUFreqReq=Default\n\n"); } else if (job_step_ptr->cpu_freq & CPU_FREQ_RANGE_FLAG) { switch (job_step_ptr->cpu_freq) { case CPU_FREQ_LOW : snprintf(tmp_line, sizeof(tmp_line), "CPUFreqReq=Low\n\n"); break; case CPU_FREQ_MEDIUM : snprintf(tmp_line, sizeof(tmp_line), "CPUFreqReq=Medium\n\n"); break; case CPU_FREQ_HIGH : snprintf(tmp_line, sizeof(tmp_line), "CPUFreqReq=High\n\n"); break; default : snprintf(tmp_line, sizeof(tmp_line), "CPUFreqReq=Unknown\n\n"); } } else { snprintf(tmp_line, sizeof(tmp_line), "CPUFreqReq=%u\n\n", job_step_ptr->cpu_freq); } xstrcat(out, tmp_line); return out; }
/* print the parameters specified */ static void _print_options(void) { ListIterator iterator; int i; char *part; uint32_t *user; enum job_states *state_id; squeue_job_step_t *job_step_id; uint32_t *job_id; char hostlist[8192]; if (params.nodes) { hostset_ranged_string(params.nodes, sizeof(hostlist)-1, hostlist); } else hostlist[0] = '\0'; printf( "-----------------------------\n" ); printf( "all = %s\n", params.all_flag ? "true" : "false"); printf( "format = %s\n", params.format ); printf( "iterate = %d\n", params.iterate ); printf( "job_flag = %d\n", params.job_flag ); printf( "jobs = %s\n", params.jobs ); printf( "max_cpus = %d\n", params.max_cpus ) ; printf( "nodes = %s\n", hostlist ) ; printf( "partitions = %s\n", params.partitions ) ; printf( "reservation = %s\n", params.reservation ) ; printf( "sort = %s\n", params.sort ) ; printf( "start_flag = %d\n", params.start_flag ); printf( "states = %s\n", params.states ) ; printf( "step_flag = %d\n", params.step_flag ); printf( "steps = %s\n", params.steps ); printf( "users = %s\n", params.users ); printf( "verbose = %d\n", params.verbose ); if ((params.verbose > 1) && params.job_list) { i = 0; iterator = list_iterator_create( params.job_list ); while ( (job_id = list_next( iterator )) ) { printf( "job_list[%d] = %u\n", i++, *job_id); } list_iterator_destroy( iterator ); } if ((params.verbose > 1) && params.part_list) { i = 0; iterator = list_iterator_create( params.part_list ); while ( (part = list_next( iterator )) ) { printf( "part_list[%d] = %s\n", i++, part); } list_iterator_destroy( iterator ); } if ((params.verbose > 1) && params.state_list) { i = 0; iterator = list_iterator_create( params.state_list ); while ( (state_id = list_next( iterator )) ) { printf( "state_list[%d] = %s\n", i++, job_state_string( *state_id )); } list_iterator_destroy( iterator ); } if ((params.verbose > 1) && params.step_list) { i = 0; iterator = list_iterator_create( params.step_list ); while ( (job_step_id = list_next( iterator )) ) { printf( "step_list[%d] = %u.%u\n", i++, job_step_id->job_id, job_step_id->step_id ); } list_iterator_destroy( iterator ); } if ((params.verbose > 1) && params.user_list) { i = 0; iterator = list_iterator_create( params.user_list ); while ( (user = list_next( iterator )) ) { printf( "user_list[%d] = %u\n", i++, *user); } list_iterator_destroy( iterator ); } printf( "-----------------------------\n\n\n" ); } ;
/* * slurm_sprint_job_info - output information about a specific Slurm * job based upon message as loaded using slurm_load_jobs * IN job_ptr - an individual job information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ extern char * slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) { int i, j, k; char time_str[32], *group_name, *user_name; char *gres_last = "", tmp1[128], tmp2[128]; char *tmp6_ptr; char tmp_line[1024 * 128]; char tmp_path[MAXPATHLEN]; char *ionodes = NULL; uint16_t exit_status = 0, term_sig = 0; job_resources_t *job_resrcs = job_ptr->job_resrcs; char *out = NULL; time_t run_time; uint32_t min_nodes, max_nodes = 0; char *nodelist = "NodeList"; bitstr_t *cpu_bitmap; char *host; int sock_inx, sock_reps, last; int abs_node_inx, rel_node_inx; int64_t nice; int bit_inx, bit_reps; uint64_t *last_mem_alloc_ptr = NULL; uint64_t last_mem_alloc = NO_VAL64; char *last_hosts; hostlist_t hl, hl_last; char select_buf[122]; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); uint32_t threads; char *line_end = (one_liner) ? " " : "\n "; if (cluster_flags & CLUSTER_FLAG_BG) { nodelist = "MidplaneList"; select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); } /****** Line 1 ******/ xstrfmtcat(out, "JobId=%u ", job_ptr->job_id); if (job_ptr->array_job_id) { if (job_ptr->array_task_str) { xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%s ", job_ptr->array_job_id, job_ptr->array_task_str); } else { xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%u ", job_ptr->array_job_id, job_ptr->array_task_id); } } xstrfmtcat(out, "JobName=%s", job_ptr->name); xstrcat(out, line_end); /****** Line 2 ******/ user_name = uid_to_string((uid_t) job_ptr->user_id); group_name = gid_to_string((gid_t) job_ptr->group_id); xstrfmtcat(out, "UserId=%s(%u) GroupId=%s(%u) MCS_label=%s", user_name, job_ptr->user_id, group_name, job_ptr->group_id, (job_ptr->mcs_label==NULL) ? "N/A" : job_ptr->mcs_label); xfree(user_name); xfree(group_name); xstrcat(out, line_end); /****** Line 3 ******/ nice = ((int64_t)job_ptr->nice) - NICE_OFFSET; xstrfmtcat(out, "Priority=%u Nice=%"PRIi64" Account=%s QOS=%s", job_ptr->priority, nice, job_ptr->account, job_ptr->qos); if (slurm_get_track_wckey()) xstrfmtcat(out, " WCKey=%s", job_ptr->wckey); xstrcat(out, line_end); /****** Line 4 ******/ xstrfmtcat(out, "JobState=%s ", job_state_string(job_ptr->job_state)); if (job_ptr->state_desc) { /* Replace white space with underscore for easier parsing */ for (j=0; job_ptr->state_desc[j]; j++) { if (isspace((int)job_ptr->state_desc[j])) job_ptr->state_desc[j] = '_'; } xstrfmtcat(out, "Reason=%s ", job_ptr->state_desc); } else xstrfmtcat(out, "Reason=%s ", job_reason_string(job_ptr->state_reason)); xstrfmtcat(out, "Dependency=%s", job_ptr->dependency); xstrcat(out, line_end); /****** Line 5 ******/ xstrfmtcat(out, "Requeue=%u Restarts=%u BatchFlag=%u Reboot=%u ", job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag, job_ptr->reboot); if (WIFSIGNALED(job_ptr->exit_code)) term_sig = WTERMSIG(job_ptr->exit_code); exit_status = WEXITSTATUS(job_ptr->exit_code); xstrfmtcat(out, "ExitCode=%u:%u", exit_status, term_sig); xstrcat(out, line_end); /****** Line 5a (optional) ******/ if (job_ptr->show_flags & SHOW_DETAIL) { if (WIFSIGNALED(job_ptr->derived_ec)) term_sig = WTERMSIG(job_ptr->derived_ec); else term_sig = 0; exit_status = WEXITSTATUS(job_ptr->derived_ec); xstrfmtcat(out, "DerivedExitCode=%u:%u", exit_status, term_sig); xstrcat(out, line_end); } /****** Line 6 ******/ if (IS_JOB_PENDING(job_ptr)) run_time = 0; else if (IS_JOB_SUSPENDED(job_ptr)) run_time = job_ptr->pre_sus_time; else { time_t end_time; if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0)) end_time = time(NULL); else end_time = job_ptr->end_time; if (job_ptr->suspend_time) { run_time = (time_t) (difftime(end_time, job_ptr->suspend_time) + job_ptr->pre_sus_time); } else run_time = (time_t) difftime(end_time, job_ptr->start_time); } secs2time_str(run_time, time_str, sizeof(time_str)); xstrfmtcat(out, "RunTime=%s ", time_str); if (job_ptr->time_limit == NO_VAL) xstrcat(out, "TimeLimit=Partition_Limit "); else { mins2time_str(job_ptr->time_limit, time_str, sizeof(time_str)); xstrfmtcat(out, "TimeLimit=%s ", time_str); } if (job_ptr->time_min == 0) xstrcat(out, "TimeMin=N/A"); else { mins2time_str(job_ptr->time_min, time_str, sizeof(time_str)); xstrfmtcat(out, "TimeMin=%s", time_str); } xstrcat(out, line_end); /****** Line 7 ******/ slurm_make_time_str(&job_ptr->submit_time, time_str, sizeof(time_str)); xstrfmtcat(out, "SubmitTime=%s ", time_str); slurm_make_time_str(&job_ptr->eligible_time, time_str, sizeof(time_str)); xstrfmtcat(out, "EligibleTime=%s", time_str); xstrcat(out, line_end); /****** Line 8 (optional) ******/ if (job_ptr->resize_time) { slurm_make_time_str(&job_ptr->resize_time, time_str, sizeof(time_str)); xstrfmtcat(out, "ResizeTime=%s", time_str); xstrcat(out, line_end); } /****** Line 9 ******/ slurm_make_time_str(&job_ptr->start_time, time_str, sizeof(time_str)); xstrfmtcat(out, "StartTime=%s ", time_str); if ((job_ptr->time_limit == INFINITE) && (job_ptr->end_time > time(NULL))) xstrcat(out, "EndTime=Unknown "); else { slurm_make_time_str(&job_ptr->end_time, time_str, sizeof(time_str)); xstrfmtcat(out, "EndTime=%s ", time_str); } if (job_ptr->deadline) { slurm_make_time_str(&job_ptr->deadline, time_str, sizeof(time_str)); xstrfmtcat(out, "Deadline=%s", time_str); } else { xstrcat(out, "Deadline=N/A"); } xstrcat(out, line_end); /****** Line 10 ******/ if (job_ptr->preempt_time == 0) xstrcat(out, "PreemptTime=None "); else { slurm_make_time_str(&job_ptr->preempt_time, time_str, sizeof(time_str)); xstrfmtcat(out, "PreemptTime=%s ", time_str); } if (job_ptr->suspend_time) { slurm_make_time_str(&job_ptr->suspend_time, time_str, sizeof(time_str)); xstrfmtcat(out, "SuspendTime=%s ", time_str); } else xstrcat(out, "SuspendTime=None "); xstrfmtcat(out, "SecsPreSuspend=%ld", (long int)job_ptr->pre_sus_time); xstrcat(out, line_end); /****** Line 11 ******/ xstrfmtcat(out, "Partition=%s AllocNode:Sid=%s:%u", job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid); xstrcat(out, line_end); /****** Line 12 ******/ xstrfmtcat(out, "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes, nodelist, job_ptr->exc_nodes); xstrcat(out, line_end); /****** Line 13 ******/ xstrfmtcat(out, "%s=%s", nodelist, job_ptr->nodes); if (job_ptr->nodes && ionodes) { xstrfmtcat(out, "[%s]", ionodes); xfree(ionodes); } if (job_ptr->sched_nodes) xstrfmtcat(out, " Sched%s=%s", nodelist, job_ptr->sched_nodes); xstrcat(out, line_end); /****** Line 14 (optional) ******/ if (job_ptr->batch_host) { xstrfmtcat(out, "BatchHost=%s", job_ptr->batch_host); xstrcat(out, line_end); } /****** Line 14a (optional) ******/ if (job_ptr->fed_siblings) { xstrfmtcat(out, "FedOrigin=%s FedSiblings=%s", job_ptr->fed_origin_str, job_ptr->fed_siblings_str); xstrcat(out, line_end); } /****** Line 15 ******/ if (cluster_flags & CLUSTER_FLAG_BG) { select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &min_nodes); if ((min_nodes == 0) || (min_nodes == NO_VAL)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; } else if (job_ptr->max_nodes) max_nodes = min_nodes; } else if (IS_JOB_PENDING(job_ptr)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; if (max_nodes && (max_nodes < min_nodes)) min_nodes = max_nodes; } else { min_nodes = job_ptr->num_nodes; max_nodes = 0; } _sprint_range(tmp_line, sizeof(tmp_line), min_nodes, max_nodes); xstrfmtcat(out, "NumNodes=%s ", tmp_line); _sprint_range(tmp_line, sizeof(tmp_line), job_ptr->num_cpus, job_ptr->max_cpus); xstrfmtcat(out, "NumCPUs=%s ", tmp_line); xstrfmtcat(out, "NumTasks=%u ", job_ptr->num_tasks); xstrfmtcat(out, "CPUs/Task=%u ", job_ptr->cpus_per_task); if (job_ptr->boards_per_node == (uint16_t) NO_VAL) xstrcat(out, "ReqB:S:C:T=*:"); else xstrfmtcat(out, "ReqB:S:C:T=%u:", job_ptr->boards_per_node); if (job_ptr->sockets_per_board == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->sockets_per_board); if (job_ptr->cores_per_socket == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->cores_per_socket); if (job_ptr->threads_per_core == (uint16_t) NO_VAL) xstrcat(out, "*"); else xstrfmtcat(out, "%u", job_ptr->threads_per_core); xstrcat(out, line_end); /****** Line 16 ******/ /* Tres should already of been converted at this point from simple */ xstrfmtcat(out, "TRES=%s", job_ptr->tres_alloc_str ? job_ptr->tres_alloc_str : job_ptr->tres_req_str); xstrcat(out, line_end); /****** Line 17 ******/ if (job_ptr->sockets_per_node == (uint16_t) NO_VAL) xstrcat(out, "Socks/Node=* "); else xstrfmtcat(out, "Socks/Node=%u ", job_ptr->sockets_per_node); if (job_ptr->ntasks_per_node == (uint16_t) NO_VAL) xstrcat(out, "NtasksPerN:B:S:C=*:"); else xstrfmtcat(out, "NtasksPerN:B:S:C=%u:", job_ptr->ntasks_per_node); if (job_ptr->ntasks_per_board == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->ntasks_per_board); if ((job_ptr->ntasks_per_socket == (uint16_t) NO_VAL) || (job_ptr->ntasks_per_socket == (uint16_t) INFINITE)) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->ntasks_per_socket); if ((job_ptr->ntasks_per_core == (uint16_t) NO_VAL) || (job_ptr->ntasks_per_core == (uint16_t) INFINITE)) xstrcat(out, "* "); else xstrfmtcat(out, "%u ", job_ptr->ntasks_per_core); if (job_ptr->core_spec == (uint16_t) NO_VAL) xstrcat(out, "CoreSpec=*"); else if (job_ptr->core_spec & CORE_SPEC_THREAD) xstrfmtcat(out, "ThreadSpec=%d", (job_ptr->core_spec & (~CORE_SPEC_THREAD))); else xstrfmtcat(out, "CoreSpec=%u", job_ptr->core_spec); xstrcat(out, line_end); if (job_resrcs && cluster_flags & CLUSTER_FLAG_BG) { if ((job_resrcs->cpu_array_cnt > 0) && (job_resrcs->cpu_array_value) && (job_resrcs->cpu_array_reps)) { int length = 0; xstrcat(out, "CPUs="); for (i = 0; i < job_resrcs->cpu_array_cnt; i++) { /* only print 60 characters worth of this record */ if (length > 60) { /* skip to last CPU group entry */ if (i < job_resrcs->cpu_array_cnt - 1) { continue; } /* add ellipsis before last entry */ xstrcat(out, "...,"); } length += xstrfmtcat(out, "%d", job_resrcs->cpus[i]); if (job_resrcs->cpu_array_reps[i] > 1) { length += xstrfmtcat(out, "*%d", job_resrcs->cpu_array_reps[i]); } if (i < job_resrcs->cpu_array_cnt - 1) { xstrcat(out, ","); length++; } } xstrcat(out, line_end); } } else if (job_resrcs && job_resrcs->core_bitmap && ((last = bit_fls(job_resrcs->core_bitmap)) != -1)) { hl = hostlist_create(job_resrcs->nodes); if (!hl) { error("slurm_sprint_job_info: hostlist_create: %s", job_resrcs->nodes); return NULL; } hl_last = hostlist_create(NULL); if (!hl_last) { error("slurm_sprint_job_info: hostlist_create: NULL"); hostlist_destroy(hl); return NULL; } bit_inx = 0; i = sock_inx = sock_reps = 0; abs_node_inx = job_ptr->node_inx[i]; gres_last = ""; /* tmp1[] stores the current cpu(s) allocated */ tmp2[0] = '\0'; /* stores last cpu(s) allocated */ for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts; rel_node_inx++) { if (sock_reps >= job_resrcs->sock_core_rep_count[sock_inx]) { sock_inx++; sock_reps = 0; } sock_reps++; bit_reps = job_resrcs->sockets_per_node[sock_inx] * job_resrcs->cores_per_socket[sock_inx]; host = hostlist_shift(hl); threads = _threads_per_core(host); cpu_bitmap = bit_alloc(bit_reps * threads); for (j = 0; j < bit_reps; j++) { if (bit_test(job_resrcs->core_bitmap, bit_inx)){ for (k = 0; k < threads; k++) bit_set(cpu_bitmap, (j * threads) + k); } bit_inx++; } bit_fmt(tmp1, sizeof(tmp1), cpu_bitmap); FREE_NULL_BITMAP(cpu_bitmap); /* * If the allocation values for this host are not the * same as the last host, print the report of the last * group of hosts that had identical allocation values. */ if (xstrcmp(tmp1, tmp2) || ((rel_node_inx < job_ptr->gres_detail_cnt) && xstrcmp(job_ptr->gres_detail_str[rel_node_inx], gres_last)) || (last_mem_alloc_ptr != job_resrcs->memory_allocated) || (job_resrcs->memory_allocated && (last_mem_alloc != job_resrcs->memory_allocated[rel_node_inx]))) { if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc( hl_last); xstrfmtcat(out, " Nodes=%s CPU_IDs=%s " "Mem=%"PRIu64" GRES_IDX=%s", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0, gres_last); xfree(last_hosts); xstrcat(out, line_end); hostlist_destroy(hl_last); hl_last = hostlist_create(NULL); } strcpy(tmp2, tmp1); if (rel_node_inx < job_ptr->gres_detail_cnt) { gres_last = job_ptr-> gres_detail_str[rel_node_inx]; } else { gres_last = ""; } last_mem_alloc_ptr = job_resrcs->memory_allocated; if (last_mem_alloc_ptr) last_mem_alloc = job_resrcs-> memory_allocated[rel_node_inx]; else last_mem_alloc = NO_VAL64; } hostlist_push_host(hl_last, host); free(host); if (bit_inx > last) break; if (abs_node_inx > job_ptr->node_inx[i+1]) { i += 2; abs_node_inx = job_ptr->node_inx[i]; } else { abs_node_inx++; } } if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc(hl_last); xstrfmtcat(out, " Nodes=%s CPU_IDs=%s Mem=%"PRIu64" GRES_IDX=%s", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0, gres_last); xfree(last_hosts); xstrcat(out, line_end); } hostlist_destroy(hl); hostlist_destroy(hl_last); } /****** Line 18 ******/ if (job_ptr->pn_min_memory & MEM_PER_CPU) { job_ptr->pn_min_memory &= (~MEM_PER_CPU); tmp6_ptr = "CPU"; } else tmp6_ptr = "Node"; if (cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)job_ptr->pn_min_cpus, tmp1, sizeof(tmp1), UNIT_NONE, NO_VAL, CONVERT_NUM_UNIT_EXACT); xstrfmtcat(out, "MinCPUsNode=%s ", tmp1); } else { xstrfmtcat(out, "MinCPUsNode=%u ", job_ptr->pn_min_cpus); } convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1), UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT); convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2), UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT); xstrfmtcat(out, "MinMemory%s=%s MinTmpDiskNode=%s", tmp6_ptr, tmp1, tmp2); xstrcat(out, line_end); /****** Line ******/ secs2time_str((time_t)job_ptr->delay_boot, tmp1, sizeof(tmp1)); xstrfmtcat(out, "Features=%s DelayBoot=%s", job_ptr->features, tmp1); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "Gres=%s Reservation=%s", job_ptr->gres, job_ptr->resv_name); xstrcat(out, line_end); /****** Line 20 ******/ xstrfmtcat(out, "OverSubscribe=%s Contiguous=%d Licenses=%s Network=%s", job_share_string(job_ptr->shared), job_ptr->contiguous, job_ptr->licenses, job_ptr->network); xstrcat(out, line_end); /****** Line 21 ******/ xstrfmtcat(out, "Command=%s", job_ptr->command); xstrcat(out, line_end); /****** Line 22 ******/ xstrfmtcat(out, "WorkDir=%s", job_ptr->work_dir); if (cluster_flags & CLUSTER_FLAG_BG) { /****** Line 23 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_BG_ID); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "Block_ID=%s", select_buf); } /****** Line 24 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED_SHORT); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrcat(out, select_buf); } /****** Line 26 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_LINUX_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "CnloadImage=%s", select_buf); } /****** Line 27 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MLOADER_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "MloaderImage=%s", select_buf); } /****** Line 28 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_RAMDISK_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "IoloadImage=%s", select_buf); } } /****** Line (optional) ******/ if (job_ptr->admin_comment) { xstrcat(out, line_end); xstrfmtcat(out, "AdminComment=%s ", job_ptr->admin_comment); } /****** Line (optional) ******/ if (job_ptr->comment) { xstrcat(out, line_end); xstrfmtcat(out, "Comment=%s ", job_ptr->comment); } /****** Line 30 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stderr(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdErr=%s", tmp_path); } /****** Line 31 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stdin(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdIn=%s", tmp_path); } /****** Line 32 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stdout(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdOut=%s", tmp_path); } /****** Line 33 (optional) ******/ if (job_ptr->batch_script) { xstrcat(out, line_end); xstrcat(out, "BatchScript=\n"); xstrcat(out, job_ptr->batch_script); } /****** Line 34 (optional) ******/ if (job_ptr->req_switch) { char time_buf[32]; xstrcat(out, line_end); secs2time_str((time_t) job_ptr->wait4switch, time_buf, sizeof(time_buf)); xstrfmtcat(out, "Switches=%u@%s\n", job_ptr->req_switch, time_buf); } /****** Line 35 (optional) ******/ if (job_ptr->burst_buffer) { xstrcat(out, line_end); xstrfmtcat(out, "BurstBuffer=%s", job_ptr->burst_buffer); } /****** Line (optional) ******/ if (job_ptr->burst_buffer_state) { xstrcat(out, line_end); xstrfmtcat(out, "BurstBufferState=%s", job_ptr->burst_buffer_state); } /****** Line 36 (optional) ******/ if (cpu_freq_debug(NULL, NULL, tmp1, sizeof(tmp1), job_ptr->cpu_freq_gov, job_ptr->cpu_freq_min, job_ptr->cpu_freq_max, NO_VAL) != 0) { xstrcat(out, line_end); xstrcat(out, tmp1); } /****** Line 37 ******/ xstrcat(out, line_end); xstrfmtcat(out, "Power=%s", power_flags_str(job_ptr->power_flags)); /****** Line 38 (optional) ******/ if (job_ptr->bitflags) { xstrcat(out, line_end); if (job_ptr->bitflags & GRES_ENFORCE_BIND) xstrcat(out, "GresEnforceBind=Yes"); if (job_ptr->bitflags & KILL_INV_DEP) xstrcat(out, "KillOInInvalidDependent=Yes"); if (job_ptr->bitflags & NO_KILL_INV_DEP) xstrcat(out, "KillOInInvalidDependent=No"); if (job_ptr->bitflags & SPREAD_JOB) xstrcat(out, "SpreadJob=Yes"); } /****** END OF JOB RECORD ******/ if (one_liner) xstrcat(out, "\n"); else xstrcat(out, "\n\n"); return out; }
/* * slurm_sprint_job_step_info - output information about a specific Slurm * job step based upon message as loaded using slurm_get_job_steps * IN job_ptr - an individual job step information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ char * slurm_sprint_job_step_info ( job_step_info_t * job_step_ptr, int one_liner ) { char tmp_node_cnt[40]; char time_str[32]; char limit_str[32]; char tmp_line[128]; char *out = NULL; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); /****** Line 1 ******/ slurm_make_time_str ((time_t *)&job_step_ptr->start_time, time_str, sizeof(time_str)); if (job_step_ptr->time_limit == INFINITE) sprintf(limit_str, "UNLIMITED"); else secs2time_str ((time_t)job_step_ptr->time_limit * 60, limit_str, sizeof(limit_str)); if (job_step_ptr->array_job_id) { if (job_step_ptr->step_id == INFINITE) { /* Pending */ snprintf(tmp_line, sizeof(tmp_line), "StepId=%u_%u.TBD ", job_step_ptr->array_job_id, job_step_ptr->array_task_id); } else { snprintf(tmp_line, sizeof(tmp_line), "StepId=%u_%u.%u ", job_step_ptr->array_job_id, job_step_ptr->array_task_id, job_step_ptr->step_id); } out = xstrdup(tmp_line); } else { if (job_step_ptr->step_id == INFINITE) { /* Pending */ snprintf(tmp_line, sizeof(tmp_line), "StepId=%u.TBD ", job_step_ptr->job_id); } else { snprintf(tmp_line, sizeof(tmp_line), "StepId=%u.%u ", job_step_ptr->job_id, job_step_ptr->step_id); } out = xstrdup(tmp_line); } snprintf(tmp_line, sizeof(tmp_line), "UserId=%u StartTime=%s TimeLimit=%s", job_step_ptr->user_id, time_str, limit_str); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 2 ******/ snprintf(tmp_line, sizeof(tmp_line), "State=%s ", job_state_string(job_step_ptr->state)); xstrcat(out, tmp_line); if (cluster_flags & CLUSTER_FLAG_BG) { char *io_nodes = NULL; select_g_select_jobinfo_get(job_step_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &io_nodes); if (io_nodes) { snprintf(tmp_line, sizeof(tmp_line), "Partition=%s MidplaneList=%s[%s] Gres=%s", job_step_ptr->partition, job_step_ptr->nodes, io_nodes, job_step_ptr->gres); xfree(io_nodes); } else snprintf(tmp_line, sizeof(tmp_line), "Partition=%s MidplaneList=%s Gres=%s", job_step_ptr->partition, job_step_ptr->nodes, job_step_ptr->gres); } else { snprintf(tmp_line, sizeof(tmp_line), "Partition=%s NodeList=%s Gres=%s", job_step_ptr->partition, job_step_ptr->nodes, job_step_ptr->gres); } xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 3 ******/ if (cluster_flags & CLUSTER_FLAG_BGQ) { uint32_t nodes = 0; select_g_select_jobinfo_get(job_step_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &nodes); convert_num_unit((float)nodes, tmp_node_cnt, sizeof(tmp_node_cnt), UNIT_NONE, CONVERT_NUM_UNIT_EXACT); } else { convert_num_unit((float)_nodes_in_list(job_step_ptr->nodes), tmp_node_cnt, sizeof(tmp_node_cnt), UNIT_NONE, CONVERT_NUM_UNIT_EXACT); } snprintf(tmp_line, sizeof(tmp_line), "Nodes=%s CPUs=%u Tasks=%u Name=%s Network=%s", tmp_node_cnt, job_step_ptr->num_cpus, job_step_ptr->num_tasks, job_step_ptr->name, job_step_ptr->network); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 4 ******/ snprintf(tmp_line, sizeof(tmp_line), "TRES=%s", job_step_ptr->tres_alloc_str); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 5 ******/ snprintf(tmp_line, sizeof(tmp_line), "ResvPorts=%s Checkpoint=%u CheckpointDir=%s", job_step_ptr->resv_ports, job_step_ptr->ckpt_interval, job_step_ptr->ckpt_dir); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 6 ******/ if (cpu_freq_debug(NULL, NULL, tmp_line, sizeof(tmp_line), job_step_ptr->cpu_freq_gov, job_step_ptr->cpu_freq_min, job_step_ptr->cpu_freq_max, NO_VAL) != 0) { xstrcat(out, tmp_line); } else { xstrcat(out, "CPUFreqReq=Default"); } xstrfmtcat(out, " Dist=%s", slurm_step_layout_type_name(job_step_ptr->task_dist)); xstrcat(out, "\n\n"); return out; }
extern int slurm_jobcomp_log_record ( struct job_record *job_ptr ) { int rc = SLURM_SUCCESS; char job_rec[1024]; char usr_str[32], grp_str[32], start_str[32], end_str[32], lim_str[32]; char select_buf[128], *state_string, *work_dir; size_t offset = 0, tot_size, wrote; enum job_states job_state; uint32_t time_limit; if ((log_name == NULL) || (job_comp_fd < 0)) { error("JobCompLoc log file %s not open", log_name); return SLURM_ERROR; } slurm_mutex_lock( &file_lock ); _get_user_name(job_ptr->user_id, usr_str, sizeof(usr_str)); _get_group_name(job_ptr->group_id, grp_str, sizeof(grp_str)); if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr) time_limit = job_ptr->part_ptr->max_time; else time_limit = job_ptr->time_limit; if (time_limit == INFINITE) strcpy(lim_str, "UNLIMITED"); else { snprintf(lim_str, sizeof(lim_str), "%lu", (unsigned long) time_limit); } if (job_ptr->job_state & JOB_RESIZING) { time_t now = time(NULL); state_string = job_state_string(job_ptr->job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&now, end_str, sizeof(end_str)); } else { /* Job state will typically have JOB_COMPLETING or JOB_RESIZING * flag set when called. We remove the flags to get the eventual * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */ job_state = job_ptr->job_state & JOB_STATE_BASE; state_string = job_state_string(job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else if (job_ptr->start_time > job_ptr->end_time) { /* Job cancelled while pending and * expected start time is in the future. */ snprintf(start_str, sizeof(start_str), "Unknown"); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&job_ptr->end_time, end_str, sizeof(end_str)); } if (job_ptr->details && job_ptr->details->work_dir) work_dir = job_ptr->details->work_dir; else work_dir = "unknown"; select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED); snprintf(job_rec, sizeof(job_rec), JOB_FORMAT, (unsigned long) job_ptr->job_id, usr_str, (unsigned long) job_ptr->user_id, grp_str, (unsigned long) job_ptr->group_id, job_ptr->name, state_string, job_ptr->partition, lim_str, start_str, end_str, job_ptr->nodes, job_ptr->node_cnt, job_ptr->total_cpus, work_dir, select_buf); tot_size = strlen(job_rec); while ( offset < tot_size ) { wrote = write(job_comp_fd, job_rec + offset, tot_size - offset); if (wrote == -1) { if (errno == EAGAIN) continue; else { plugin_errno = errno; rc = SLURM_ERROR; break; } } offset += wrote; } slurm_mutex_unlock( &file_lock ); return rc; }
void print_fields(type_t type, void *object) { if (!object) { fatal ("Job or step record is NULL"); return; } slurmdb_job_rec_t *job = (slurmdb_job_rec_t *)object; slurmdb_step_rec_t *step = (slurmdb_step_rec_t *)object; jobcomp_job_rec_t *job_comp = (jobcomp_job_rec_t *)object; print_field_t *field = NULL; int curr_inx = 1; struct passwd *pw = NULL; struct group *gr = NULL; char outbuf[FORMAT_STRING_SIZE]; bool got_stats = false; int cpu_tres_rec_count = 0; int step_cpu_tres_rec_count = 0; switch(type) { case JOB: step = NULL; if (!job->track_steps) step = (slurmdb_step_rec_t *)job->first_step_ptr; /* set this to avoid printing out info for things that don't mean anything. Like an allocation that never ran anything. */ if (!step) job->track_steps = 1; else step_cpu_tres_rec_count = slurmdb_find_tres_count_in_string( step->tres_alloc_str, TRES_CPU); if (job->stats.cpu_min != NO_VAL) got_stats = true; job_comp = NULL; cpu_tres_rec_count = slurmdb_find_tres_count_in_string( (job->tres_alloc_str && job->tres_alloc_str[0]) ? job->tres_alloc_str : job->tres_req_str, TRES_CPU); break; case JOBSTEP: job = step->job_ptr; if (step->stats.cpu_min != NO_VAL) got_stats = true; if ((step_cpu_tres_rec_count = slurmdb_find_tres_count_in_string( step->tres_alloc_str, TRES_CPU)) == INFINITE64) step_cpu_tres_rec_count = slurmdb_find_tres_count_in_string( (job->tres_alloc_str && job->tres_alloc_str[0]) ? job->tres_alloc_str : job->tres_req_str, TRES_CPU); job_comp = NULL; break; case JOBCOMP: job = NULL; step = NULL; break; default: break; } if ((uint64_t)cpu_tres_rec_count == INFINITE64) cpu_tres_rec_count = 0; if ((uint64_t)step_cpu_tres_rec_count == INFINITE64) step_cpu_tres_rec_count = 0; list_iterator_reset(print_fields_itr); while((field = list_next(print_fields_itr))) { char *tmp_char = NULL, id[FORMAT_STRING_SIZE]; int tmp_int = NO_VAL, tmp_int2 = NO_VAL; double tmp_dub = (double)NO_VAL; /* don't use NO_VAL64 unless we can confirm the values coming in are NO_VAL64 */ uint32_t tmp_uint32 = NO_VAL; uint64_t tmp_uint64 = NO_VAL64; memset(&outbuf, 0, sizeof(outbuf)); switch(field->type) { case PRINT_ALLOC_CPUS: switch(type) { case JOB: tmp_int = cpu_tres_rec_count; // we want to use the step info if (!step) break; case JOBSTEP: tmp_int = step_cpu_tres_rec_count; break; case JOBCOMP: default: tmp_int = job_comp->proc_cnt; break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_ALLOC_GRES: switch(type) { case JOB: tmp_char = job->alloc_gres; break; case JOBSTEP: tmp_char = step->job_ptr->alloc_gres; break; case JOBCOMP: default: tmp_char = NULL; break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_ALLOC_NODES: switch(type) { case JOB: tmp_int = job->alloc_nodes; tmp_char = job->tres_alloc_str; break; case JOBSTEP: tmp_int = step->nnodes; tmp_char = step->tres_alloc_str; break; case JOBCOMP: tmp_int = job_comp->node_cnt; break; default: break; } if (!tmp_int && tmp_char) { if ((tmp_uint64 = slurmdb_find_tres_count_in_string( tmp_char, TRES_NODE)) != INFINITE64) tmp_int = tmp_uint64; } convert_num_unit((double)tmp_int, outbuf, sizeof(outbuf), UNIT_NONE, NO_VAL, params.convert_flags); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_ACCOUNT: switch(type) { case JOB: tmp_char = job->account; break; case JOBSTEP: tmp_char = step->job_ptr->account; break; case JOBCOMP: default: tmp_char = "n/a"; break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_ACT_CPUFREQ: if (got_stats) { switch (type) { case JOB: if (!job->track_steps) tmp_dub = step->stats.act_cpufreq; break; case JOBSTEP: tmp_dub = step->stats.act_cpufreq; break; default: break; } } if (!fuzzy_equal(tmp_dub, NO_VAL)) convert_num_unit2((double)tmp_dub, outbuf, sizeof(outbuf), UNIT_KILO, params.units, 1000, params.convert_flags & (~CONVERT_NUM_UNIT_EXACT)); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_ASSOCID: switch(type) { case JOB: tmp_int = job->associd; break; case JOBSTEP: tmp_int = step->job_ptr->associd; break; case JOBCOMP: default: tmp_int = NO_VAL; break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_AVECPU: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_dub = job->stats.cpu_ave; break; case JOBSTEP: tmp_dub = step->stats.cpu_ave; break; case JOBCOMP: default: break; } } if (!fuzzy_equal(tmp_dub, NO_VAL)) tmp_char = _elapsed_time((long)tmp_dub, 0); field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_AVEDISKREAD: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_dub = job-> stats.disk_read_ave; break; case JOBSTEP: tmp_dub = step->stats.disk_read_ave; break; case JOBCOMP: default: break; } } _print_small_double(outbuf, sizeof(outbuf), tmp_dub, UNIT_MEGA); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_AVEDISKWRITE: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_dub = job-> stats.disk_write_ave; break; case JOBSTEP: tmp_dub = step->stats.disk_write_ave; break; case JOBCOMP: default: break; } } _print_small_double(outbuf, sizeof(outbuf), tmp_dub, UNIT_MEGA); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_AVEPAGES: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_dub = job->stats.pages_ave; break; case JOBSTEP: tmp_dub = step->stats.pages_ave; break; case JOBCOMP: default: break; } } if (!fuzzy_equal(tmp_dub, NO_VAL)) convert_num_unit((double)tmp_dub, outbuf, sizeof(outbuf), UNIT_KILO, params.units, params.convert_flags); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_AVERSS: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_dub = job->stats.rss_ave; break; case JOBSTEP: tmp_dub = step->stats.rss_ave; break; case JOBCOMP: default: break; } } if (!fuzzy_equal(tmp_dub, NO_VAL)) convert_num_unit((double)tmp_dub, outbuf, sizeof(outbuf), UNIT_KILO, params.units, params.convert_flags); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_AVEVSIZE: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_dub = job->stats.vsize_ave; break; case JOBSTEP: tmp_dub = step->stats.vsize_ave; break; case JOBCOMP: default: break; } } if (!fuzzy_equal(tmp_dub, NO_VAL)) convert_num_unit((double)tmp_dub, outbuf, sizeof(outbuf), UNIT_KILO, params.units, params.convert_flags); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_BLOCKID: switch(type) { case JOB: tmp_char = job->blockid; break; case JOBSTEP: break; case JOBCOMP: tmp_char = job_comp->blockid; break; default: break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_CLUSTER: switch(type) { case JOB: tmp_char = job->cluster; break; case JOBSTEP: tmp_char = step->job_ptr->cluster; break; case JOBCOMP: default: break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_COMMENT: switch(type) { case JOB: tmp_char = job->derived_es; break; case JOBSTEP: case JOBCOMP: default: tmp_char = NULL; break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_CONSUMED_ENERGY: if (got_stats) { switch (type) { case JOB: if (!job->track_steps) tmp_dub = step-> stats.consumed_energy; break; case JOBSTEP: tmp_dub = step->stats.consumed_energy; break; default: break; } } if (!fuzzy_equal(tmp_dub, NO_VAL)) convert_num_unit2((double)tmp_dub, outbuf, sizeof(outbuf), UNIT_NONE, params.units, 1000, params.convert_flags & (~CONVERT_NUM_UNIT_EXACT)); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_CONSUMED_ENERGY_RAW: if (got_stats) { switch (type) { case JOB: if (!job->track_steps) tmp_dub = step-> stats.consumed_energy; break; case JOBSTEP: tmp_dub = step->stats.consumed_energy; break; default: break; } } field->print_routine(field, tmp_dub, (curr_inx == field_count)); break; case PRINT_CPU_TIME: switch(type) { case JOB: tmp_uint64 = (uint64_t)job->elapsed * (uint64_t)cpu_tres_rec_count; break; case JOBSTEP: tmp_uint64 = (uint64_t)step->elapsed * (uint64_t)step_cpu_tres_rec_count; break; case JOBCOMP: break; default: break; } field->print_routine(field, tmp_uint64, (curr_inx == field_count)); break; case PRINT_CPU_TIME_RAW: switch(type) { case JOB: tmp_uint64 = (uint64_t)job->elapsed * (uint64_t)cpu_tres_rec_count; break; case JOBSTEP: tmp_uint64 = (uint64_t)step->elapsed * (uint64_t)step_cpu_tres_rec_count; break; case JOBCOMP: break; default: break; } field->print_routine(field, tmp_uint64, (curr_inx == field_count)); break; case PRINT_DERIVED_EC: tmp_int2 = 0; switch(type) { case JOB: tmp_int = job->derived_ec; if (tmp_int == NO_VAL) tmp_int = 0; if (WIFSIGNALED(tmp_int)) tmp_int2 = WTERMSIG(tmp_int); snprintf(outbuf, sizeof(outbuf), "%d:%d", WEXITSTATUS(tmp_int), tmp_int2); break; case JOBSTEP: case JOBCOMP: default: outbuf[0] = '\0'; break; } field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_ELAPSED: switch(type) { case JOB: tmp_int = job->elapsed; break; case JOBSTEP: tmp_int = step->elapsed; break; case JOBCOMP: tmp_int = job_comp->elapsed_time; break; default: tmp_int = NO_VAL; break; } field->print_routine(field, (uint64_t)tmp_int, (curr_inx == field_count)); break; case PRINT_ELIGIBLE: switch(type) { case JOB: tmp_int = job->eligible; break; case JOBSTEP: tmp_int = step->start; break; case JOBCOMP: break; default: break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_END: switch(type) { case JOB: tmp_int = job->end; break; case JOBSTEP: tmp_int = step->end; break; case JOBCOMP: tmp_int = parse_time(job_comp->end_time, 1); break; default: tmp_int = NO_VAL; break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_EXITCODE: tmp_int = 0; tmp_int2 = 0; switch(type) { case JOB: tmp_int = job->exitcode; break; case JOBSTEP: tmp_int = step->exitcode; break; case JOBCOMP: default: break; } if (tmp_int != NO_VAL) { if (WIFSIGNALED(tmp_int)) tmp_int2 = WTERMSIG(tmp_int); tmp_int = WEXITSTATUS(tmp_int); if (tmp_int >= 128) tmp_int -= 128; } snprintf(outbuf, sizeof(outbuf), "%d:%d", tmp_int, tmp_int2); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_GID: switch(type) { case JOB: tmp_int = job->gid; break; case JOBSTEP: tmp_int = NO_VAL; break; case JOBCOMP: tmp_int = job_comp->gid; break; default: tmp_int = NO_VAL; break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_GROUP: switch(type) { case JOB: tmp_int = job->gid; break; case JOBSTEP: tmp_int = NO_VAL; break; case JOBCOMP: tmp_int = job_comp->gid; break; default: tmp_int = NO_VAL; break; } tmp_char = NULL; if ((gr=getgrgid(tmp_int))) tmp_char=gr->gr_name; field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_JOBID: if (type == JOBSTEP) job = step->job_ptr; if (job) { if (job->array_task_str) { _xlate_task_str(job); snprintf(id, FORMAT_STRING_SIZE, "%u_[%s]", job->array_job_id, job->array_task_str); } else if (job->array_task_id != NO_VAL) snprintf(id, FORMAT_STRING_SIZE, "%u_%u", job->array_job_id, job->array_task_id); else snprintf(id, FORMAT_STRING_SIZE, "%u", job->jobid); } switch (type) { case JOB: tmp_char = xstrdup(id); break; case JOBSTEP: if (step->stepid == SLURM_BATCH_SCRIPT) { tmp_char = xstrdup_printf( "%s.batch", id); } else if (step->stepid == SLURM_EXTERN_CONT) { tmp_char = xstrdup_printf( "%s.extern", id); } else { tmp_char = xstrdup_printf( "%s.%u", id, step->stepid); } break; case JOBCOMP: tmp_char = xstrdup_printf("%u", job_comp->jobid); break; default: break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_JOBIDRAW: switch (type) { case JOB: tmp_char = xstrdup_printf("%u", job->jobid); break; case JOBSTEP: if (step->stepid == SLURM_BATCH_SCRIPT) { tmp_char = xstrdup_printf( "%u.batch", step->job_ptr->jobid); } else if (step->stepid == SLURM_EXTERN_CONT) { tmp_char = xstrdup_printf( "%u.extern", step->job_ptr->jobid); } else { tmp_char = xstrdup_printf( "%u.%u", step->job_ptr->jobid, step->stepid); } break; case JOBCOMP: tmp_char = xstrdup_printf("%u", job_comp->jobid); break; default: break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_JOBNAME: switch(type) { case JOB: tmp_char = job->jobname; break; case JOBSTEP: tmp_char = step->stepname; break; case JOBCOMP: tmp_char = job_comp->jobname; break; default: tmp_char = NULL; break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_LAYOUT: switch(type) { case JOB: /* below really should be step. It is not a typo */ if (!job->track_steps) tmp_char = slurm_step_layout_type_name( step->task_dist); break; case JOBSTEP: tmp_char = slurm_step_layout_type_name( step->task_dist); break; case JOBCOMP: break; default: tmp_char = NULL; break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_MAXDISKREAD: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_dub = job-> stats.disk_read_max; break; case JOBSTEP: tmp_dub = step->stats.disk_read_max; break; case JOBCOMP: default: break; } } _print_small_double(outbuf, sizeof(outbuf), tmp_dub, UNIT_MEGA); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_MAXDISKREADNODE: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_char = find_hostname( job->stats. disk_read_max_nodeid, job->nodes); break; case JOBSTEP: tmp_char = find_hostname( step->stats. disk_read_max_nodeid, step->nodes); break; case JOBCOMP: default: tmp_char = NULL; break; } } field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_MAXDISKREADTASK: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_uint32 = job->stats. disk_read_max_taskid; break; case JOBSTEP: tmp_uint32 = step->stats. disk_read_max_taskid; break; case JOBCOMP: default: tmp_uint32 = NO_VAL; break; } } if (tmp_uint32 == (uint32_t)NO_VAL) tmp_uint32 = NO_VAL; field->print_routine(field, tmp_uint32, (curr_inx == field_count)); break; case PRINT_MAXDISKWRITE: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_dub = job->stats. disk_write_max; break; case JOBSTEP: tmp_dub = step->stats.disk_write_max; break; case JOBCOMP: default: break; } } _print_small_double(outbuf, sizeof(outbuf), tmp_dub, UNIT_MEGA); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_MAXDISKWRITENODE: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_char = find_hostname( job->stats. disk_write_max_nodeid, job->nodes); break; case JOBSTEP: tmp_char = find_hostname( step->stats. disk_write_max_nodeid, step->nodes); break; case JOBCOMP: default: tmp_char = NULL; break; } } field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_MAXDISKWRITETASK: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_uint32 = job->stats. disk_write_max_taskid; break; case JOBSTEP: tmp_uint32 = step->stats. disk_write_max_taskid; break; case JOBCOMP: default: tmp_uint32 = NO_VAL; break; } if (tmp_uint32 == (uint32_t)NO_VAL) tmp_uint32 = NO_VAL; } field->print_routine(field, tmp_uint32, (curr_inx == field_count)); break; case PRINT_MAXPAGES: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_uint64 = job->stats.pages_max; break; case JOBSTEP: tmp_uint64 = step->stats.pages_max; break; case JOBCOMP: default: break; } if (tmp_uint64 != (uint64_t)NO_VAL64) convert_num_unit( (double)tmp_uint64, outbuf, sizeof(outbuf), UNIT_KILO, params.units, params.convert_flags); } field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_MAXPAGESNODE: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_char = find_hostname( job->stats. pages_max_nodeid, job->nodes); break; case JOBSTEP: tmp_char = find_hostname( step->stats.pages_max_nodeid, step->nodes); break; case JOBCOMP: default: tmp_char = NULL; break; } } field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_MAXPAGESTASK: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_uint32 = job->stats. pages_max_taskid; break; case JOBSTEP: tmp_uint32 = step->stats. pages_max_taskid; break; case JOBCOMP: default: tmp_uint32 = NO_VAL; break; } if (tmp_uint32 == (uint32_t)NO_VAL) tmp_uint32 = NO_VAL; } field->print_routine(field, tmp_uint32, (curr_inx == field_count)); break; case PRINT_MAXRSS: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_uint64 = job->stats.rss_max; break; case JOBSTEP: tmp_uint64 = step->stats.rss_max; break; case JOBCOMP: default: break; } if (tmp_uint64 != (uint64_t)NO_VAL64) convert_num_unit( (double)tmp_uint64, outbuf, sizeof(outbuf), UNIT_KILO, params.units, params.convert_flags); } field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_MAXRSSNODE: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_char = find_hostname( job->stats. rss_max_nodeid, job->nodes); break; case JOBSTEP: tmp_char = find_hostname( step->stats.rss_max_nodeid, step->nodes); break; case JOBCOMP: default: tmp_char = NULL; break; } } field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_MAXRSSTASK: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_uint32 = job->stats. rss_max_taskid; break; case JOBSTEP: tmp_uint32 = step->stats.rss_max_taskid; break; case JOBCOMP: default: tmp_uint32 = NO_VAL; break; } if (tmp_uint32 == (uint32_t)NO_VAL) tmp_uint32 = NO_VAL; } field->print_routine(field, tmp_uint32, (curr_inx == field_count)); break; case PRINT_MAXVSIZE: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_uint64 = job->stats. vsize_max; break; case JOBSTEP: tmp_uint64 = step->stats.vsize_max; break; case JOBCOMP: default: tmp_uint64 = (uint64_t)NO_VAL64; break; } if (tmp_uint64 != (uint64_t)NO_VAL64) convert_num_unit( (double)tmp_uint64, outbuf, sizeof(outbuf), UNIT_KILO, params.units, params.convert_flags); } field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_MAXVSIZENODE: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_char = find_hostname( job->stats. vsize_max_nodeid, job->nodes); break; case JOBSTEP: tmp_char = find_hostname( step->stats.vsize_max_nodeid, step->nodes); break; case JOBCOMP: default: tmp_char = NULL; break; } } field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_MAXVSIZETASK: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_uint32 = job->stats. vsize_max_taskid; break; case JOBSTEP: tmp_uint32 = step->stats. vsize_max_taskid; break; case JOBCOMP: default: tmp_uint32 = NO_VAL; break; } if (tmp_uint32 == (uint32_t)NO_VAL) tmp_uint32 = NO_VAL; } field->print_routine(field, tmp_uint32, (curr_inx == field_count)); break; case PRINT_MINCPU: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_dub = job->stats.cpu_min; break; case JOBSTEP: tmp_dub = step->stats.cpu_min; break; case JOBCOMP: default: break; } if (!fuzzy_equal(tmp_dub, NO_VAL)) tmp_char = _elapsed_time( (long)tmp_dub, 0); } field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_MINCPUNODE: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_char = find_hostname( job->stats. cpu_min_nodeid, job->nodes); break; case JOBSTEP: tmp_char = find_hostname( step->stats.cpu_min_nodeid, step->nodes); break; case JOBCOMP: default: tmp_char = NULL; break; } } field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_MINCPUTASK: if (got_stats) { switch(type) { case JOB: if (!job->track_steps) tmp_uint32 = job->stats. cpu_min_taskid; break; case JOBSTEP: tmp_uint32 = step->stats.cpu_min_taskid; break; case JOBCOMP: default: tmp_uint32 = NO_VAL; break; } if (tmp_uint32 == (uint32_t)NO_VAL) tmp_uint32 = NO_VAL; } field->print_routine(field, tmp_uint32, (curr_inx == field_count)); break; case PRINT_NODELIST: switch(type) { case JOB: tmp_char = job->nodes; break; case JOBSTEP: tmp_char = step->nodes; break; case JOBCOMP: tmp_char = job_comp->nodelist; break; default: break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_NNODES: switch(type) { case JOB: tmp_int = job->alloc_nodes; tmp_char = (job->tres_alloc_str && job->tres_alloc_str[0]) ? job->tres_alloc_str : job->tres_req_str; break; case JOBSTEP: tmp_int = step->nnodes; tmp_char = step->tres_alloc_str; break; case JOBCOMP: tmp_int = job_comp->node_cnt; break; default: break; } if (!tmp_int && tmp_char) { if ((tmp_uint64 = slurmdb_find_tres_count_in_string( tmp_char, TRES_NODE)) != INFINITE64) tmp_int = tmp_uint64; } convert_num_unit((double)tmp_int, outbuf, sizeof(outbuf), UNIT_NONE, params.units, params.convert_flags); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_NTASKS: switch(type) { case JOB: if (!job->track_steps && !step) tmp_int = cpu_tres_rec_count; // we want to use the step info if (!step) break; case JOBSTEP: tmp_int = step->ntasks; break; case JOBCOMP: default: break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_PRIO: switch(type) { case JOB: tmp_int = job->priority; break; case JOBSTEP: break; case JOBCOMP: break; default: break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_PARTITION: switch(type) { case JOB: tmp_char = job->partition; break; case JOBSTEP: break; case JOBCOMP: tmp_char = job_comp->partition; break; default: break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_QOS: switch(type) { case JOB: tmp_int = job->qosid; break; case JOBSTEP: break; case JOBCOMP: break; default: break; } if (!g_qos_list) { slurmdb_qos_cond_t qos_cond; memset(&qos_cond, 0, sizeof(slurmdb_qos_cond_t)); qos_cond.with_deleted = 1; g_qos_list = slurmdb_qos_get( acct_db_conn, &qos_cond); } tmp_char = _find_qos_name_from_list(g_qos_list, tmp_int); field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_QOSRAW: switch(type) { case JOB: tmp_int = job->qosid; break; case JOBSTEP: break; case JOBCOMP: break; default: break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_REQ_CPUFREQ_MIN: switch (type) { case JOB: if (!job->track_steps && !step) tmp_dub = NO_VAL; // we want to use the step info if (!step) break; case JOBSTEP: tmp_dub = step->req_cpufreq_min; break; default: break; } cpu_freq_to_string(outbuf, sizeof(outbuf), tmp_dub); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_REQ_CPUFREQ_MAX: switch (type) { case JOB: if (!job->track_steps && !step) tmp_dub = NO_VAL; // we want to use the step info if (!step) break; case JOBSTEP: tmp_dub = step->req_cpufreq_max; break; default: break; } cpu_freq_to_string(outbuf, sizeof(outbuf), tmp_dub); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_REQ_CPUFREQ_GOV: switch (type) { case JOB: if (!job->track_steps && !step) tmp_dub = NO_VAL; // we want to use the step info if (!step) break; case JOBSTEP: tmp_dub = step->req_cpufreq_gov; break; default: break; } cpu_freq_to_string(outbuf, sizeof(outbuf), tmp_dub); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_REQ_CPUS: switch(type) { case JOB: tmp_int = job->req_cpus; break; case JOBSTEP: tmp_int = step_cpu_tres_rec_count; break; case JOBCOMP: break; default: break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_REQ_GRES: switch(type) { case JOB: tmp_char = job->req_gres; break; case JOBSTEP: tmp_char = step->job_ptr->req_gres; break; case JOBCOMP: default: tmp_char = NULL; break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_REQ_MEM: switch(type) { case JOB: tmp_uint32 = job->req_mem; break; case JOBSTEP: tmp_uint32 = step->job_ptr->req_mem; break; case JOBCOMP: default: tmp_uint32 = NO_VAL; break; } if (tmp_uint32 != (uint32_t)NO_VAL) { bool per_cpu = false; if (tmp_uint32 & MEM_PER_CPU) { tmp_uint32 &= (~MEM_PER_CPU); per_cpu = true; } convert_num_unit((double)tmp_uint32, outbuf, sizeof(outbuf), UNIT_MEGA, params.units, params.convert_flags); if (per_cpu) sprintf(outbuf+strlen(outbuf), "c"); else sprintf(outbuf+strlen(outbuf), "n"); } field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_REQ_NODES: switch(type) { case JOB: tmp_int = 0; tmp_char = job->tres_req_str; break; case JOBSTEP: tmp_int = step->nnodes; tmp_char = step->tres_alloc_str; break; case JOBCOMP: tmp_int = job_comp->node_cnt; break; default: break; } if (!tmp_int && tmp_char) { if ((tmp_uint64 = slurmdb_find_tres_count_in_string( tmp_char, TRES_NODE)) != INFINITE64) tmp_int = tmp_uint64; } convert_num_unit((double)tmp_int, outbuf, sizeof(outbuf), UNIT_NONE, params.units, params.convert_flags); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_RESERVATION: switch(type) { case JOB: if (job->resv_name) { tmp_char = job->resv_name; } else { tmp_char = NULL; } break; case JOBSTEP: tmp_char = NULL; break; case JOBCOMP: tmp_char = NULL; break; default: tmp_char = NULL; break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_RESERVATION_ID: switch(type) { case JOB: if (job->resvid) tmp_uint32 = job->resvid; else tmp_uint32 = NO_VAL; break; case JOBSTEP: tmp_uint32 = NO_VAL; break; case JOBCOMP: tmp_uint32 = NO_VAL; break; default: tmp_uint32 = NO_VAL; break; } if (tmp_uint32 == (uint32_t)NO_VAL) tmp_uint32 = NO_VAL; field->print_routine(field, tmp_uint32, (curr_inx == field_count)); break; case PRINT_RESV: switch(type) { case JOB: if (job->start) tmp_int = job->start - job->eligible; else tmp_int = time(NULL) - job->eligible; break; case JOBSTEP: break; case JOBCOMP: break; default: break; } field->print_routine(field, (uint64_t)tmp_int, (curr_inx == field_count)); break; case PRINT_RESV_CPU: switch(type) { case JOB: if (job->start) tmp_int = (job->start - job->eligible) * job->req_cpus; else tmp_int = (time(NULL) - job->eligible) * job->req_cpus; break; case JOBSTEP: break; case JOBCOMP: break; default: break; } field->print_routine(field, (uint64_t)tmp_int, (curr_inx == field_count)); break; case PRINT_RESV_CPU_RAW: switch(type) { case JOB: if (job->start) tmp_int = (job->start - job->eligible) * job->req_cpus; else tmp_int = (time(NULL) - job->eligible) * job->req_cpus; break; case JOBSTEP: break; case JOBCOMP: break; default: break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_START: switch(type) { case JOB: tmp_int = job->start; break; case JOBSTEP: tmp_int = step->start; break; case JOBCOMP: tmp_int = parse_time(job_comp->start_time, 1); break; default: break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_STATE: switch(type) { case JOB: tmp_int = job->state; tmp_int2 = job->requid; break; case JOBSTEP: tmp_int = step->state; tmp_int2 = step->requid; break; case JOBCOMP: tmp_char = job_comp->state; break; default: break; } if (((tmp_int & JOB_STATE_BASE) == JOB_CANCELLED) && (tmp_int2 != -1)) snprintf(outbuf, FORMAT_STRING_SIZE, "%s by %d", job_state_string(tmp_int), tmp_int2); else if (tmp_int != NO_VAL) snprintf(outbuf, FORMAT_STRING_SIZE, "%s", job_state_string(tmp_int)); else if (tmp_char) snprintf(outbuf, FORMAT_STRING_SIZE, "%s", tmp_char); field->print_routine(field, outbuf, (curr_inx == field_count)); break; case PRINT_SUBMIT: switch(type) { case JOB: tmp_int = job->submit; break; case JOBSTEP: tmp_int = step->start; break; case JOBCOMP: tmp_int = parse_time(job_comp->start_time, 1); break; default: break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_SUSPENDED: switch(type) { case JOB: tmp_int = job->suspended; break; case JOBSTEP: tmp_int = step->suspended; break; case JOBCOMP: break; default: break; } field->print_routine(field, (uint64_t)tmp_int, (curr_inx == field_count)); break; case PRINT_SYSTEMCPU: if (got_stats) { switch(type) { case JOB: tmp_int = job->sys_cpu_sec; tmp_int2 = job->sys_cpu_usec; break; case JOBSTEP: tmp_int = step->sys_cpu_sec; tmp_int2 = step->sys_cpu_usec; break; case JOBCOMP: default: break; } tmp_char = _elapsed_time(tmp_int, tmp_int2); } field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_TIMELIMIT: switch(type) { case JOB: if (job->timelimit == INFINITE) tmp_char = "UNLIMITED"; else if (job->timelimit == NO_VAL) tmp_char = "Partition_Limit"; else if (job->timelimit) { char tmp1[128]; mins2time_str(job->timelimit, tmp1, sizeof(tmp1)); tmp_char = tmp1; } break; case JOBSTEP: break; case JOBCOMP: tmp_char = job_comp->timelimit; break; default: break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_TOTALCPU: switch(type) { case JOB: tmp_int = job->tot_cpu_sec; tmp_int2 = job->tot_cpu_usec; break; case JOBSTEP: tmp_int = step->tot_cpu_sec; tmp_int2 = step->tot_cpu_usec; break; case JOBCOMP: break; default: break; } tmp_char = _elapsed_time(tmp_int, tmp_int2); field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_TRESA: switch(type) { case JOB: tmp_char = job->tres_alloc_str; break; case JOBSTEP: tmp_char = step->tres_alloc_str; break; case JOBCOMP: default: tmp_char = NULL; break; } if (!g_tres_list) { slurmdb_tres_cond_t tres_cond; memset(&tres_cond, 0, sizeof(slurmdb_tres_cond_t)); tres_cond.with_deleted = 1; g_tres_list = slurmdb_tres_get( acct_db_conn, &tres_cond); } tmp_char = slurmdb_make_tres_string_from_simple( tmp_char, g_tres_list, params.units, params.convert_flags); field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_TRESR: switch(type) { case JOB: tmp_char = job->tres_req_str; break; case JOBSTEP: case JOBCOMP: default: tmp_char = NULL; break; } if (!g_tres_list) { slurmdb_tres_cond_t tres_cond; memset(&tres_cond, 0, sizeof(slurmdb_tres_cond_t)); tres_cond.with_deleted = 1; g_tres_list = slurmdb_tres_get( acct_db_conn, &tres_cond); } tmp_char = slurmdb_make_tres_string_from_simple( tmp_char, g_tres_list, params.units, params.convert_flags); field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_UID: switch(type) { case JOB: if (job->user) { if ((pw=getpwnam(job->user))) tmp_int = pw->pw_uid; } else tmp_int = job->uid; break; case JOBSTEP: break; case JOBCOMP: tmp_int = job_comp->uid; break; default: break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; case PRINT_USER: switch(type) { case JOB: if (job->user) tmp_char = job->user; else if (job->uid != -1) { if ((pw=getpwuid(job->uid))) tmp_char = pw->pw_name; } break; case JOBSTEP: break; case JOBCOMP: tmp_char = job_comp->uid_name; break; default: break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_USERCPU: if (got_stats) { switch(type) { case JOB: tmp_int = job->user_cpu_sec; tmp_int2 = job->user_cpu_usec; break; case JOBSTEP: tmp_int = step->user_cpu_sec; tmp_int2 = step->user_cpu_usec; break; case JOBCOMP: default: break; } tmp_char = _elapsed_time(tmp_int, tmp_int2); } field->print_routine(field, tmp_char, (curr_inx == field_count)); xfree(tmp_char); break; case PRINT_WCKEY: switch(type) { case JOB: tmp_char = job->wckey; break; case JOBSTEP: break; case JOBCOMP: break; default: break; } field->print_routine(field, tmp_char, (curr_inx == field_count)); break; case PRINT_WCKEYID: switch(type) { case JOB: tmp_int = job->wckeyid; break; case JOBSTEP: break; case JOBCOMP: break; default: break; } field->print_routine(field, tmp_int, (curr_inx == field_count)); break; default: break; } curr_inx++; } printf("\n"); }
/* * Attempt to start a job * jobid (IN) - job id * task_cnt (IN) - total count of tasks to start * hostlist (IN) - SLURM hostlist expression with no repeated hostnames * tasklist (IN/OUT) - comma separated list of hosts with tasks to be started, * list hostname once per task to start * comment_ptr (IN) - new comment field for the job or NULL for no change * err_code (OUT) - Moab error code * err_msg (OUT) - Moab error message */ static int _start_job(uint32_t jobid, int task_cnt, char *hostlist, char *tasklist, char *comment_ptr, int *err_code, char **err_msg) { int rc = 0, old_task_cnt = 1; struct job_record *job_ptr; /* Write lock on job info, read lock on node info */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK }; char *new_node_list = NULL; static char tmp_msg[128]; bitstr_t *new_bitmap = (bitstr_t *) NULL; bitstr_t *save_req_bitmap = (bitstr_t *) NULL; bitoff_t i, bsize; int ll; /* layout info index */ char *node_name, *node_idx, *node_cur, *save_req_nodes = NULL; size_t node_name_len; static uint32_t cr_test = 0, cr_enabled = 0; if (cr_test == 0) { select_g_get_info_from_plugin(SELECT_CR_PLUGIN, NULL, &cr_enabled); cr_test = 1; } lock_slurmctld(job_write_lock); job_ptr = find_job_record(jobid); if (job_ptr == NULL) { *err_code = -700; *err_msg = "No such job"; error("wiki: Failed to find job %u", jobid); rc = -1; goto fini; } if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) { *err_code = -700; *err_msg = "Job not pending, can't start"; error("wiki: Attempt to start job %u in state %s", jobid, job_state_string(job_ptr->job_state)); rc = -1; goto fini; } if (comment_ptr) { char *reserved = strstr(comment_ptr, "RESERVED:"); if (reserved) { reserved += 9; job_ptr->details->reserved_resources = strtol(reserved, NULL, 10); } xfree(job_ptr->comment); job_ptr->comment = xstrdup(comment_ptr); } if (task_cnt) { new_node_list = xstrdup(hostlist); if (node_name2bitmap(new_node_list, false, &new_bitmap) != 0) { *err_code = -700; *err_msg = "Invalid TASKLIST"; error("wiki: Attempt to set invalid node list for " "job %u, %s", jobid, hostlist); xfree(new_node_list); rc = -1; goto fini; } if (!bit_super_set(new_bitmap, avail_node_bitmap)) { /* Selected node is UP and not responding * or it just went DOWN */ *err_code = -700; *err_msg = "TASKLIST includes non-responsive node"; error("wiki: Attempt to use non-responsive nodes for " "job %u, %s", jobid, hostlist); xfree(new_node_list); FREE_NULL_BITMAP(new_bitmap); rc = -1; goto fini; } /* User excluded node list incompatible with Wiki * Exclude all nodes not explicitly requested */ FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap); job_ptr->details->exc_node_bitmap = bit_copy(new_bitmap); bit_not(job_ptr->details->exc_node_bitmap); } /* Build layout information from tasklist (assuming that Moab * sends a non-bracketed list of nodes, repeated as many times * as cpus should be used per node); at this point, node names * are comma-separated. This is _not_ a fast algorithm as it * performs many string compares. */ xfree(job_ptr->details->req_node_layout); if (task_cnt && cr_enabled) { uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task); job_ptr->details->req_node_layout = (uint16_t *) xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t)); bsize = bit_size(new_bitmap); for (i = 0, ll = -1; i < bsize; i++) { if (!bit_test(new_bitmap, i)) continue; ll++; node_name = node_record_table_ptr[i].name; node_name_len = strlen(node_name); if (node_name_len == 0) continue; node_cur = tasklist; while (*node_cur) { if ((node_idx = strstr(node_cur, node_name))) { if ((node_idx[node_name_len] == ',') || (node_idx[node_name_len] == '\0')) { job_ptr->details-> req_node_layout[ll] += cpus_per_task; } node_cur = strchr(node_idx, ','); if (node_cur) continue; } break; } } } /* save and update job state to start now */ save_req_nodes = job_ptr->details->req_nodes; job_ptr->details->req_nodes = new_node_list; save_req_bitmap = job_ptr->details->req_node_bitmap; job_ptr->details->req_node_bitmap = new_bitmap; old_task_cnt = job_ptr->details->min_cpus; job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt); job_ptr->priority = 100000000; fini: unlock_slurmctld(job_write_lock); if (rc) return rc; /* No errors so far */ (void) schedule(INFINITE); /* provides own locking */ /* Check to insure the job was actually started */ lock_slurmctld(job_write_lock); if (job_ptr->job_id != jobid) job_ptr = find_job_record(jobid); if (job_ptr && (job_ptr->job_id == jobid) && (!IS_JOB_RUNNING(job_ptr))) { uint16_t wait_reason = 0; char *wait_string; if (IS_JOB_FAILED(job_ptr)) wait_string = "Invalid request, job aborted"; else { wait_reason = job_ptr->state_reason; if (wait_reason == WAIT_HELD) { /* some job is completing, slurmctld did * not even try to schedule this job */ wait_reason = WAIT_RESOURCES; } wait_string = job_reason_string(wait_reason); job_ptr->state_reason = WAIT_HELD; xfree(job_ptr->state_desc); } *err_code = -910 - wait_reason; snprintf(tmp_msg, sizeof(tmp_msg), "Could not start job %u(%s): %s", jobid, new_node_list, wait_string); *err_msg = tmp_msg; error("wiki: %s", tmp_msg); /* restore some of job state */ job_ptr->priority = 0; job_ptr->details->min_cpus = old_task_cnt; rc = -1; } if (job_ptr && (job_ptr->job_id == jobid) && job_ptr->details) { /* Restore required node list in case job requeued */ xfree(job_ptr->details->req_nodes); job_ptr->details->req_nodes = save_req_nodes; FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap); job_ptr->details->req_node_bitmap = save_req_bitmap; FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap); xfree(job_ptr->details->req_node_layout); } else { error("wiki: start_job(%u) job missing", jobid); xfree(save_req_nodes); FREE_NULL_BITMAP(save_req_bitmap); } unlock_slurmctld(job_write_lock); schedule_node_save(); /* provides own locking */ schedule_job_save(); /* provides own locking */ return rc; }
extern List mysql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond) { char *query = NULL; char *extra = NULL; char *tmp = NULL; char *selected_part = NULL; slurmdb_selected_step_t *selected_step = NULL; ListIterator itr = NULL; int set = 0; MYSQL_RES *result = NULL; MYSQL_ROW row; int i; int lc = 0; jobcomp_job_rec_t *job = NULL; char time_str[32]; time_t temp_time; List job_list = list_create(jobcomp_destroy_job); if (job_cond->step_list && list_count(job_cond->step_list)) { set = 0; xstrcat(extra, " where ("); itr = list_iterator_create(job_cond->step_list); while((selected_step = list_next(itr))) { if (set) xstrcat(extra, " || "); tmp = xstrdup_printf("jobid=%d", selected_step->jobid); xstrcat(extra, tmp); set = 1; xfree(tmp); } list_iterator_destroy(itr); xstrcat(extra, ")"); } if (job_cond->partition_list && list_count(job_cond->partition_list)) { set = 0; if (extra) xstrcat(extra, " && ("); else xstrcat(extra, " where ("); itr = list_iterator_create(job_cond->partition_list); while((selected_part = list_next(itr))) { if (set) xstrcat(extra, " || "); tmp = xstrdup_printf("`partition`='%s'", selected_part); xstrcat(extra, tmp); set = 1; xfree(tmp); } list_iterator_destroy(itr); xstrcat(extra, ")"); } i = 0; while(jobcomp_table_fields[i].name) { if (i) xstrcat(tmp, ", "); xstrcat(tmp, jobcomp_table_fields[i].name); i++; } query = xstrdup_printf("select %s from %s", tmp, jobcomp_table); xfree(tmp); if (extra) { xstrcat(query, extra); xfree(extra); } //info("query = %s", query); if (!(result = mysql_db_query_ret(jobcomp_mysql_conn, query, 0))) { xfree(query); list_destroy(job_list); return NULL; } xfree(query); while((row = mysql_fetch_row(result))) { lc++; job = xmalloc(sizeof(jobcomp_job_rec_t)); if (row[JOBCOMP_REQ_JOBID]) job->jobid = slurm_atoul(row[JOBCOMP_REQ_JOBID]); job->partition = xstrdup(row[JOBCOMP_REQ_PARTITION]); temp_time = atoi(row[JOBCOMP_REQ_STARTTIME]); slurm_make_time_str(&temp_time, time_str, sizeof(time_str)); job->start_time = xstrdup(time_str); temp_time = atoi(row[JOBCOMP_REQ_ENDTIME]); slurm_make_time_str(&temp_time, time_str, sizeof(time_str)); job->elapsed_time = atoi(row[JOBCOMP_REQ_ENDTIME]) - atoi(row[JOBCOMP_REQ_STARTTIME]); job->end_time = xstrdup(time_str); if (row[JOBCOMP_REQ_UID]) job->uid = slurm_atoul(row[JOBCOMP_REQ_UID]); job->uid_name = xstrdup(row[JOBCOMP_REQ_USER_NAME]); if (row[JOBCOMP_REQ_GID]) job->gid = slurm_atoul(row[JOBCOMP_REQ_GID]); job->gid_name = xstrdup(row[JOBCOMP_REQ_GROUP_NAME]); job->jobname = xstrdup(row[JOBCOMP_REQ_NAME]); job->nodelist = xstrdup(row[JOBCOMP_REQ_NODELIST]); if (row[JOBCOMP_REQ_NODECNT]) job->node_cnt = slurm_atoul(row[JOBCOMP_REQ_NODECNT]); if (row[JOBCOMP_REQ_STATE]) { i = atoi(row[JOBCOMP_REQ_STATE]); job->state = xstrdup(job_state_string(i)); } job->timelimit = xstrdup(row[JOBCOMP_REQ_TIMELIMIT]); if (row[JOBCOMP_REQ_MAXPROCS]) job->max_procs = slurm_atoul(row[JOBCOMP_REQ_MAXPROCS]); job->connection = xstrdup(row[JOBCOMP_REQ_CONNECTION]); job->reboot = xstrdup(row[JOBCOMP_REQ_REBOOT]); job->rotate = xstrdup(row[JOBCOMP_REQ_ROTATE]); job->geo = xstrdup(row[JOBCOMP_REQ_GEOMETRY]); job->bg_start_point = xstrdup(row[JOBCOMP_REQ_START]); job->blockid = xstrdup(row[JOBCOMP_REQ_BLOCKID]); list_append(job_list, job); } mysql_free_result(result); return job_list; }
/* Creates a tree model containing the completions */ void _search_entry(sview_search_info_t *sview_search_info) { int id = 0; char title[100]; ListIterator itr = NULL; popup_info_t *popup_win = NULL; GError *error = NULL; char *upper = NULL, *lower = NULL; char *type; if (cluster_flags & CLUSTER_FLAG_BG) type = "Midplane"; else type = "Node"; if (sview_search_info->int_data == NO_VAL && (!sview_search_info->gchar_data || !strlen(sview_search_info->gchar_data))) { g_print("nothing given to search for.\n"); return; } switch(sview_search_info->search_type) { case SEARCH_JOB_STATE: id = JOB_PAGE; upper = job_state_string(sview_search_info->int_data); lower = str_tolower(upper); snprintf(title, 100, "Job(s) in the %s state", lower); xfree(lower); break; case SEARCH_JOB_ID: id = JOB_PAGE; snprintf(title, 100, "Job %s info", sview_search_info->gchar_data); break; case SEARCH_JOB_USER: id = JOB_PAGE; snprintf(title, 100, "Job(s) info for user %s", sview_search_info->gchar_data); break; case SEARCH_BLOCK_STATE: id = BLOCK_PAGE; upper = bg_block_state_string(sview_search_info->int_data); lower = str_tolower(upper); snprintf(title, 100, "BG Block(s) in the %s state", lower); xfree(lower); break; case SEARCH_BLOCK_NAME: id = BLOCK_PAGE; snprintf(title, 100, "Block %s info", sview_search_info->gchar_data); break; case SEARCH_BLOCK_SIZE: id = BLOCK_PAGE; sview_search_info->int_data = revert_num_unit(sview_search_info->gchar_data); if (sview_search_info->int_data == -1) return; snprintf(title, 100, "Block(s) of size %d cnodes", sview_search_info->int_data); break; case SEARCH_PARTITION_NAME: id = PART_PAGE; snprintf(title, 100, "Partition %s info", sview_search_info->gchar_data); break; case SEARCH_PARTITION_STATE: id = PART_PAGE; if (sview_search_info->int_data) snprintf(title, 100, "Partition(s) that are up"); else snprintf(title, 100, "Partition(s) that are down"); break; case SEARCH_NODE_NAME: id = NODE_PAGE; snprintf(title, 100, "%s(s) %s info", type, sview_search_info->gchar_data); break; case SEARCH_NODE_STATE: id = NODE_PAGE; upper = node_state_string(sview_search_info->int_data); lower = str_tolower(upper); snprintf(title, 100, "%s(s) in the %s state", type, lower); xfree(lower); break; case SEARCH_RESERVATION_NAME: id = RESV_PAGE; snprintf(title, 100, "Reservation %s info", sview_search_info->gchar_data); break; default: g_print("unknown search type %d.\n", sview_search_info->search_type); return; } itr = list_iterator_create(popup_list); while ((popup_win = list_next(itr))) { if (popup_win->spec_info) if (!strcmp(popup_win->spec_info->title, title)) { break; } } list_iterator_destroy(itr); if (!popup_win) { popup_win = create_popup_info(id, id, title); } else { gtk_window_present(GTK_WINDOW(popup_win->popup)); return; } memcpy(popup_win->spec_info->search_info, sview_search_info, sizeof(sview_search_info_t)); if (!g_thread_create((gpointer)popup_thr, popup_win, FALSE, &error)) { g_printerr ("Failed to create main popup thread: %s\n", error->message); return; } return; }
extern int slurm_jobcomp_log_record ( struct job_record *job_ptr ) { int rc = SLURM_SUCCESS; char job_rec[1024]; char usr_str[32], grp_str[32], start_str[32], end_str[32], lim_str[32]; char *resv_name, *gres, *account, *qos, *wckey, *cluster; char submit_time[32], eligible_time[32], array_id[64], pack_id[64]; char select_buf[128], *state_string, *work_dir; size_t offset = 0, tot_size, wrote; uint32_t job_state; uint32_t time_limit; if ((log_name == NULL) || (job_comp_fd < 0)) { error("JobCompLoc log file %s not open", log_name); return SLURM_ERROR; } slurm_mutex_lock( &file_lock ); _get_user_name(job_ptr->user_id, usr_str, sizeof(usr_str)); _get_group_name(job_ptr->group_id, grp_str, sizeof(grp_str)); if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr) time_limit = job_ptr->part_ptr->max_time; else time_limit = job_ptr->time_limit; if (time_limit == INFINITE) strcpy(lim_str, "UNLIMITED"); else { snprintf(lim_str, sizeof(lim_str), "%lu", (unsigned long) time_limit); } if (job_ptr->job_state & JOB_RESIZING) { time_t now = time(NULL); state_string = job_state_string(job_ptr->job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&now, end_str, sizeof(end_str)); } else { /* Job state will typically have JOB_COMPLETING or JOB_RESIZING * flag set when called. We remove the flags to get the eventual * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */ job_state = job_ptr->job_state & JOB_STATE_BASE; state_string = job_state_string(job_state); if (job_ptr->resize_time) { _make_time_str(&job_ptr->resize_time, start_str, sizeof(start_str)); } else if (job_ptr->start_time > job_ptr->end_time) { /* Job cancelled while pending and * expected start time is in the future. */ snprintf(start_str, sizeof(start_str), "Unknown"); } else { _make_time_str(&job_ptr->start_time, start_str, sizeof(start_str)); } _make_time_str(&job_ptr->end_time, end_str, sizeof(end_str)); } if (job_ptr->details && job_ptr->details->work_dir) work_dir = job_ptr->details->work_dir; else work_dir = "unknown"; if (job_ptr->resv_name && job_ptr->resv_name[0]) resv_name = job_ptr->resv_name; else resv_name = ""; if (job_ptr->gres_req && job_ptr->gres_req[0]) gres = job_ptr->gres_req; else gres = ""; if (job_ptr->account && job_ptr->account[0]) account = job_ptr->account; else account = ""; if (job_ptr->qos_ptr != NULL) { qos = job_ptr->qos_ptr->name; } else qos = ""; if (job_ptr->wckey && job_ptr->wckey[0]) wckey = job_ptr->wckey; else wckey = ""; if (job_ptr->assoc_ptr != NULL) cluster = job_ptr->assoc_ptr->cluster; else cluster = "unknown"; if (job_ptr->details && job_ptr->details->submit_time) { _make_time_str(&job_ptr->details->submit_time, submit_time, sizeof(submit_time)); } else { snprintf(submit_time, sizeof(submit_time), "unknown"); } if (job_ptr->details && job_ptr->details->begin_time) { _make_time_str(&job_ptr->details->begin_time, eligible_time, sizeof(eligible_time)); } else { snprintf(eligible_time, sizeof(eligible_time), "unknown"); } if (job_ptr->array_task_id != NO_VAL) { snprintf(array_id, sizeof(array_id), " ArrayJobId=%u ArrayTaskId=%u", job_ptr->array_job_id, job_ptr->array_task_id); } else { array_id[0] = '\0'; } if (job_ptr->pack_job_id) { snprintf(pack_id, sizeof(pack_id), " PackJobId=%u PackJobOffset=%u", job_ptr->pack_job_id, job_ptr->pack_job_offset); } else { pack_id[0] = '\0'; } select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED); snprintf(job_rec, sizeof(job_rec), JOB_FORMAT, (unsigned long) job_ptr->job_id, usr_str, (unsigned long) job_ptr->user_id, grp_str, (unsigned long) job_ptr->group_id, job_ptr->name, state_string, job_ptr->partition, lim_str, start_str, end_str, job_ptr->nodes, job_ptr->node_cnt, job_ptr->total_cpus, work_dir, resv_name, gres, account, qos, wckey, cluster, submit_time, eligible_time, array_id, pack_id, job_ptr->derived_ec, job_ptr->exit_code, select_buf); tot_size = strlen(job_rec); while (offset < tot_size) { wrote = write(job_comp_fd, job_rec + offset, tot_size - offset); if (wrote == -1) { if (errno == EAGAIN) continue; else { plugin_errno = errno; rc = SLURM_ERROR; break; } } offset += wrote; } slurm_mutex_unlock( &file_lock ); return rc; }