/* * Verify cpu_freq command line option * * --cpu-freq=arg * where arg is p1{-p2{:p3}} * * - p1 can be [#### | low | medium | high | highm1] * which will set the current frequency, and set the governor to * UserSpace. * - p1 can be [Conservative | OnDemand | Performance | PowerSave | UserSpace] * which will set the governor to the corresponding value. * - When p2 is present, p1 will be the minimum frequency and p2 will be * the maximum. The governor will not be changed. * - p2 can be [#### | medium | high | highm1] p2 must be greater than p1. * - If the current frequency is < min, it will be set to min. * Likewise, if the current frequency is > max, it will be set to max. * - p3 can be [Conservative | OnDemand | Performance | PowerSave | UserSpace] * which will set the governor to the corresponding value. * When p3 is UserSpace, the current frequency is set to p2. * p2 will have been set by PowerCapping. * * returns -1 on error, 0 otherwise */ extern int cpu_freq_verify_cmdline(const char *arg, uint32_t *cpu_freq_min, uint32_t *cpu_freq_max, uint32_t *cpu_freq_gov) { char *poscolon, *posdash; char *p1=NULL, *p2=NULL, *p3=NULL; uint32_t frequency; int rc = 0; if (cpu_freq_govs == 0) cpu_freq_govs = slurm_get_cpu_freq_govs(); if (arg == NULL || cpu_freq_min == NULL || cpu_freq_max == NULL || cpu_freq_gov == NULL) { return -1; } *cpu_freq_min = NO_VAL; *cpu_freq_max = NO_VAL; *cpu_freq_gov = NO_VAL; poscolon = strchr(arg,':'); if (poscolon) { p3 = xstrdup((poscolon+1)); } posdash = strchr(arg,'-'); if (posdash) { p1 = xstrndup(arg, (posdash-arg)); if (poscolon) { p2 = xstrndup((posdash+1), ((poscolon-posdash)-1)); } else { p2 = xstrdup((posdash+1)); } } else { if (poscolon) { p1 = xstrndup(arg, (poscolon-arg)); } else { p1 = xstrdup(arg); } } frequency = _cpu_freq_check_gov(p1, 0); if (frequency != 0) { if (p3) { error("governor cannot be specified twice " "%s{-}:%s in --cpu-freq", p1, p3); rc = -1; goto clean; } *cpu_freq_gov = frequency; } else { frequency = _cpu_freq_check_freq(p1); if (frequency == 0) { rc = -1; goto clean; } *cpu_freq_max = frequency; } if (p2) { frequency = _cpu_freq_check_freq(p2); if (frequency == 0) { rc = -1; goto clean; } *cpu_freq_min = *cpu_freq_max; *cpu_freq_max = frequency; if (*cpu_freq_max < *cpu_freq_min) { error("min cpu-frec (%s) must be < max cpu-freq (%s)", p1, p2); rc = -1; goto clean; } } if (p3) { if (!p2) { error("gov on cpu-frec (%s) illegal without max", p3); rc = -1; goto clean; } frequency = _cpu_freq_check_gov(p3, 0); if (frequency == 0) { error("illegal governor: %s on --cpu-freq", p3); rc = -1; goto clean; } *cpu_freq_gov = frequency; } clean: if (*cpu_freq_gov != NO_VAL) { if (((*cpu_freq_gov & cpu_freq_govs) & ~CPU_FREQ_RANGE_FLAG) == 0) { error("governor on %s is not allowed in slurm.conf", arg); *cpu_freq_gov = NO_VAL; rc = -1; } } if (debug_flags & DEBUG_FLAG_CPU_FREQ) { cpu_freq_debug("command", "NO_VAL", NULL, 0, *cpu_freq_gov, *cpu_freq_min, *cpu_freq_max, NO_VAL); } xfree(p1); xfree(p2); xfree(p3); return rc; }
/* * reset the cpus used by the process to their * default frequency and governor type */ extern void cpu_freq_reset(stepd_step_rec_t *job) { int i, rc, fd; char freq_detail[100]; if ((!cpu_freq_count) || (!cpufreq)) return; for (i = 0; i < cpu_freq_count; i++) { if (cpufreq[i].new_frequency == NO_VAL && cpufreq[i].new_min_freq == NO_VAL && cpufreq[i].new_min_freq == NO_VAL && cpufreq[i].new_governor[0] == '\0') continue; /* Nothing to reset on this CPU */ fd = _test_cpu_owner_lock(i, job->jobid); if (fd < 0) continue; if (cpufreq[i].new_frequency != NO_VAL) { rc = _cpu_freq_set_gov(job, i, "userspace"); if (rc == SLURM_FAILURE) continue; rc = _cpu_freq_set_scaling_freq(job, i, cpufreq[i].org_frequency, "scaling_setspeed"); if (rc == SLURM_FAILURE) continue; cpufreq[i].new_governor[0] = 'u'; /* force gov reset */ } /* Max must be set before min, per * www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt */ if (cpufreq[i].new_max_freq != NO_VAL) { rc = _cpu_freq_set_scaling_freq(job, i, cpufreq[i].org_max_freq, "scaling_max_freq"); if (rc == SLURM_FAILURE) continue; } if (cpufreq[i].new_min_freq != NO_VAL) { rc = _cpu_freq_set_scaling_freq(job, i, cpufreq[i].org_min_freq, "scaling_min_freq"); if (rc == SLURM_FAILURE) continue; } if (cpufreq[i].new_governor[0] != '\0') { rc = _cpu_freq_set_gov(job, i, cpufreq[i].org_governor); if (rc == SLURM_FAILURE) continue; } if (debug_flags & DEBUG_FLAG_CPU_FREQ) { cpu_freq_debug(NULL, NULL, freq_detail, sizeof(freq_detail), NO_VAL, cpufreq[i].org_min_freq, cpufreq[i].org_max_freq, cpufreq[i].org_frequency); if (cpufreq[i].new_governor[0] != '\0') { info("cpu_freq: reset cpu=%d %s Governor=%s", i, freq_detail, cpufreq[i].org_governor); } else { info("cpu_freq: reset cpu=%d %s", i, freq_detail); } } } xfree(slurmd_spooldir); }
/* * slurm_sprint_job_info - output information about a specific Slurm * job based upon message as loaded using slurm_load_jobs * IN job_ptr - an individual job information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ extern char * slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) { int i, j, k; char time_str[32], *group_name, *user_name; char *gres_last = "", tmp1[128], tmp2[128]; char *tmp6_ptr; char tmp_line[1024 * 128]; char tmp_path[MAXPATHLEN]; char *ionodes = NULL; uint16_t exit_status = 0, term_sig = 0; job_resources_t *job_resrcs = job_ptr->job_resrcs; char *out = NULL; time_t run_time; uint32_t min_nodes, max_nodes = 0; char *nodelist = "NodeList"; bitstr_t *cpu_bitmap; char *host; int sock_inx, sock_reps, last; int abs_node_inx, rel_node_inx; int64_t nice; int bit_inx, bit_reps; uint64_t *last_mem_alloc_ptr = NULL; uint64_t last_mem_alloc = NO_VAL64; char *last_hosts; hostlist_t hl, hl_last; char select_buf[122]; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); uint32_t threads; char *line_end = (one_liner) ? " " : "\n "; if (cluster_flags & CLUSTER_FLAG_BG) { nodelist = "MidplaneList"; select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); } /****** Line 1 ******/ xstrfmtcat(out, "JobId=%u ", job_ptr->job_id); if (job_ptr->array_job_id) { if (job_ptr->array_task_str) { xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%s ", job_ptr->array_job_id, job_ptr->array_task_str); } else { xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%u ", job_ptr->array_job_id, job_ptr->array_task_id); } } xstrfmtcat(out, "JobName=%s", job_ptr->name); xstrcat(out, line_end); /****** Line 2 ******/ user_name = uid_to_string((uid_t) job_ptr->user_id); group_name = gid_to_string((gid_t) job_ptr->group_id); xstrfmtcat(out, "UserId=%s(%u) GroupId=%s(%u) MCS_label=%s", user_name, job_ptr->user_id, group_name, job_ptr->group_id, (job_ptr->mcs_label==NULL) ? "N/A" : job_ptr->mcs_label); xfree(user_name); xfree(group_name); xstrcat(out, line_end); /****** Line 3 ******/ nice = ((int64_t)job_ptr->nice) - NICE_OFFSET; xstrfmtcat(out, "Priority=%u Nice=%"PRIi64" Account=%s QOS=%s", job_ptr->priority, nice, job_ptr->account, job_ptr->qos); if (slurm_get_track_wckey()) xstrfmtcat(out, " WCKey=%s", job_ptr->wckey); xstrcat(out, line_end); /****** Line 4 ******/ xstrfmtcat(out, "JobState=%s ", job_state_string(job_ptr->job_state)); if (job_ptr->state_desc) { /* Replace white space with underscore for easier parsing */ for (j=0; job_ptr->state_desc[j]; j++) { if (isspace((int)job_ptr->state_desc[j])) job_ptr->state_desc[j] = '_'; } xstrfmtcat(out, "Reason=%s ", job_ptr->state_desc); } else xstrfmtcat(out, "Reason=%s ", job_reason_string(job_ptr->state_reason)); xstrfmtcat(out, "Dependency=%s", job_ptr->dependency); xstrcat(out, line_end); /****** Line 5 ******/ xstrfmtcat(out, "Requeue=%u Restarts=%u BatchFlag=%u Reboot=%u ", job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag, job_ptr->reboot); if (WIFSIGNALED(job_ptr->exit_code)) term_sig = WTERMSIG(job_ptr->exit_code); exit_status = WEXITSTATUS(job_ptr->exit_code); xstrfmtcat(out, "ExitCode=%u:%u", exit_status, term_sig); xstrcat(out, line_end); /****** Line 5a (optional) ******/ if (job_ptr->show_flags & SHOW_DETAIL) { if (WIFSIGNALED(job_ptr->derived_ec)) term_sig = WTERMSIG(job_ptr->derived_ec); else term_sig = 0; exit_status = WEXITSTATUS(job_ptr->derived_ec); xstrfmtcat(out, "DerivedExitCode=%u:%u", exit_status, term_sig); xstrcat(out, line_end); } /****** Line 6 ******/ if (IS_JOB_PENDING(job_ptr)) run_time = 0; else if (IS_JOB_SUSPENDED(job_ptr)) run_time = job_ptr->pre_sus_time; else { time_t end_time; if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0)) end_time = time(NULL); else end_time = job_ptr->end_time; if (job_ptr->suspend_time) { run_time = (time_t) (difftime(end_time, job_ptr->suspend_time) + job_ptr->pre_sus_time); } else run_time = (time_t) difftime(end_time, job_ptr->start_time); } secs2time_str(run_time, time_str, sizeof(time_str)); xstrfmtcat(out, "RunTime=%s ", time_str); if (job_ptr->time_limit == NO_VAL) xstrcat(out, "TimeLimit=Partition_Limit "); else { mins2time_str(job_ptr->time_limit, time_str, sizeof(time_str)); xstrfmtcat(out, "TimeLimit=%s ", time_str); } if (job_ptr->time_min == 0) xstrcat(out, "TimeMin=N/A"); else { mins2time_str(job_ptr->time_min, time_str, sizeof(time_str)); xstrfmtcat(out, "TimeMin=%s", time_str); } xstrcat(out, line_end); /****** Line 7 ******/ slurm_make_time_str(&job_ptr->submit_time, time_str, sizeof(time_str)); xstrfmtcat(out, "SubmitTime=%s ", time_str); slurm_make_time_str(&job_ptr->eligible_time, time_str, sizeof(time_str)); xstrfmtcat(out, "EligibleTime=%s", time_str); xstrcat(out, line_end); /****** Line 8 (optional) ******/ if (job_ptr->resize_time) { slurm_make_time_str(&job_ptr->resize_time, time_str, sizeof(time_str)); xstrfmtcat(out, "ResizeTime=%s", time_str); xstrcat(out, line_end); } /****** Line 9 ******/ slurm_make_time_str(&job_ptr->start_time, time_str, sizeof(time_str)); xstrfmtcat(out, "StartTime=%s ", time_str); if ((job_ptr->time_limit == INFINITE) && (job_ptr->end_time > time(NULL))) xstrcat(out, "EndTime=Unknown "); else { slurm_make_time_str(&job_ptr->end_time, time_str, sizeof(time_str)); xstrfmtcat(out, "EndTime=%s ", time_str); } if (job_ptr->deadline) { slurm_make_time_str(&job_ptr->deadline, time_str, sizeof(time_str)); xstrfmtcat(out, "Deadline=%s", time_str); } else { xstrcat(out, "Deadline=N/A"); } xstrcat(out, line_end); /****** Line 10 ******/ if (job_ptr->preempt_time == 0) xstrcat(out, "PreemptTime=None "); else { slurm_make_time_str(&job_ptr->preempt_time, time_str, sizeof(time_str)); xstrfmtcat(out, "PreemptTime=%s ", time_str); } if (job_ptr->suspend_time) { slurm_make_time_str(&job_ptr->suspend_time, time_str, sizeof(time_str)); xstrfmtcat(out, "SuspendTime=%s ", time_str); } else xstrcat(out, "SuspendTime=None "); xstrfmtcat(out, "SecsPreSuspend=%ld", (long int)job_ptr->pre_sus_time); xstrcat(out, line_end); /****** Line 11 ******/ xstrfmtcat(out, "Partition=%s AllocNode:Sid=%s:%u", job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid); xstrcat(out, line_end); /****** Line 12 ******/ xstrfmtcat(out, "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes, nodelist, job_ptr->exc_nodes); xstrcat(out, line_end); /****** Line 13 ******/ xstrfmtcat(out, "%s=%s", nodelist, job_ptr->nodes); if (job_ptr->nodes && ionodes) { xstrfmtcat(out, "[%s]", ionodes); xfree(ionodes); } if (job_ptr->sched_nodes) xstrfmtcat(out, " Sched%s=%s", nodelist, job_ptr->sched_nodes); xstrcat(out, line_end); /****** Line 14 (optional) ******/ if (job_ptr->batch_host) { xstrfmtcat(out, "BatchHost=%s", job_ptr->batch_host); xstrcat(out, line_end); } /****** Line 14a (optional) ******/ if (job_ptr->fed_siblings) { xstrfmtcat(out, "FedOrigin=%s FedSiblings=%s", job_ptr->fed_origin_str, job_ptr->fed_siblings_str); xstrcat(out, line_end); } /****** Line 15 ******/ if (cluster_flags & CLUSTER_FLAG_BG) { select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &min_nodes); if ((min_nodes == 0) || (min_nodes == NO_VAL)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; } else if (job_ptr->max_nodes) max_nodes = min_nodes; } else if (IS_JOB_PENDING(job_ptr)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; if (max_nodes && (max_nodes < min_nodes)) min_nodes = max_nodes; } else { min_nodes = job_ptr->num_nodes; max_nodes = 0; } _sprint_range(tmp_line, sizeof(tmp_line), min_nodes, max_nodes); xstrfmtcat(out, "NumNodes=%s ", tmp_line); _sprint_range(tmp_line, sizeof(tmp_line), job_ptr->num_cpus, job_ptr->max_cpus); xstrfmtcat(out, "NumCPUs=%s ", tmp_line); xstrfmtcat(out, "NumTasks=%u ", job_ptr->num_tasks); xstrfmtcat(out, "CPUs/Task=%u ", job_ptr->cpus_per_task); if (job_ptr->boards_per_node == (uint16_t) NO_VAL) xstrcat(out, "ReqB:S:C:T=*:"); else xstrfmtcat(out, "ReqB:S:C:T=%u:", job_ptr->boards_per_node); if (job_ptr->sockets_per_board == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->sockets_per_board); if (job_ptr->cores_per_socket == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->cores_per_socket); if (job_ptr->threads_per_core == (uint16_t) NO_VAL) xstrcat(out, "*"); else xstrfmtcat(out, "%u", job_ptr->threads_per_core); xstrcat(out, line_end); /****** Line 16 ******/ /* Tres should already of been converted at this point from simple */ xstrfmtcat(out, "TRES=%s", job_ptr->tres_alloc_str ? job_ptr->tres_alloc_str : job_ptr->tres_req_str); xstrcat(out, line_end); /****** Line 17 ******/ if (job_ptr->sockets_per_node == (uint16_t) NO_VAL) xstrcat(out, "Socks/Node=* "); else xstrfmtcat(out, "Socks/Node=%u ", job_ptr->sockets_per_node); if (job_ptr->ntasks_per_node == (uint16_t) NO_VAL) xstrcat(out, "NtasksPerN:B:S:C=*:"); else xstrfmtcat(out, "NtasksPerN:B:S:C=%u:", job_ptr->ntasks_per_node); if (job_ptr->ntasks_per_board == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->ntasks_per_board); if ((job_ptr->ntasks_per_socket == (uint16_t) NO_VAL) || (job_ptr->ntasks_per_socket == (uint16_t) INFINITE)) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->ntasks_per_socket); if ((job_ptr->ntasks_per_core == (uint16_t) NO_VAL) || (job_ptr->ntasks_per_core == (uint16_t) INFINITE)) xstrcat(out, "* "); else xstrfmtcat(out, "%u ", job_ptr->ntasks_per_core); if (job_ptr->core_spec == (uint16_t) NO_VAL) xstrcat(out, "CoreSpec=*"); else if (job_ptr->core_spec & CORE_SPEC_THREAD) xstrfmtcat(out, "ThreadSpec=%d", (job_ptr->core_spec & (~CORE_SPEC_THREAD))); else xstrfmtcat(out, "CoreSpec=%u", job_ptr->core_spec); xstrcat(out, line_end); if (job_resrcs && cluster_flags & CLUSTER_FLAG_BG) { if ((job_resrcs->cpu_array_cnt > 0) && (job_resrcs->cpu_array_value) && (job_resrcs->cpu_array_reps)) { int length = 0; xstrcat(out, "CPUs="); for (i = 0; i < job_resrcs->cpu_array_cnt; i++) { /* only print 60 characters worth of this record */ if (length > 60) { /* skip to last CPU group entry */ if (i < job_resrcs->cpu_array_cnt - 1) { continue; } /* add ellipsis before last entry */ xstrcat(out, "...,"); } length += xstrfmtcat(out, "%d", job_resrcs->cpus[i]); if (job_resrcs->cpu_array_reps[i] > 1) { length += xstrfmtcat(out, "*%d", job_resrcs->cpu_array_reps[i]); } if (i < job_resrcs->cpu_array_cnt - 1) { xstrcat(out, ","); length++; } } xstrcat(out, line_end); } } else if (job_resrcs && job_resrcs->core_bitmap && ((last = bit_fls(job_resrcs->core_bitmap)) != -1)) { hl = hostlist_create(job_resrcs->nodes); if (!hl) { error("slurm_sprint_job_info: hostlist_create: %s", job_resrcs->nodes); return NULL; } hl_last = hostlist_create(NULL); if (!hl_last) { error("slurm_sprint_job_info: hostlist_create: NULL"); hostlist_destroy(hl); return NULL; } bit_inx = 0; i = sock_inx = sock_reps = 0; abs_node_inx = job_ptr->node_inx[i]; gres_last = ""; /* tmp1[] stores the current cpu(s) allocated */ tmp2[0] = '\0'; /* stores last cpu(s) allocated */ for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts; rel_node_inx++) { if (sock_reps >= job_resrcs->sock_core_rep_count[sock_inx]) { sock_inx++; sock_reps = 0; } sock_reps++; bit_reps = job_resrcs->sockets_per_node[sock_inx] * job_resrcs->cores_per_socket[sock_inx]; host = hostlist_shift(hl); threads = _threads_per_core(host); cpu_bitmap = bit_alloc(bit_reps * threads); for (j = 0; j < bit_reps; j++) { if (bit_test(job_resrcs->core_bitmap, bit_inx)){ for (k = 0; k < threads; k++) bit_set(cpu_bitmap, (j * threads) + k); } bit_inx++; } bit_fmt(tmp1, sizeof(tmp1), cpu_bitmap); FREE_NULL_BITMAP(cpu_bitmap); /* * If the allocation values for this host are not the * same as the last host, print the report of the last * group of hosts that had identical allocation values. */ if (xstrcmp(tmp1, tmp2) || ((rel_node_inx < job_ptr->gres_detail_cnt) && xstrcmp(job_ptr->gres_detail_str[rel_node_inx], gres_last)) || (last_mem_alloc_ptr != job_resrcs->memory_allocated) || (job_resrcs->memory_allocated && (last_mem_alloc != job_resrcs->memory_allocated[rel_node_inx]))) { if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc( hl_last); xstrfmtcat(out, " Nodes=%s CPU_IDs=%s " "Mem=%"PRIu64" GRES_IDX=%s", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0, gres_last); xfree(last_hosts); xstrcat(out, line_end); hostlist_destroy(hl_last); hl_last = hostlist_create(NULL); } strcpy(tmp2, tmp1); if (rel_node_inx < job_ptr->gres_detail_cnt) { gres_last = job_ptr-> gres_detail_str[rel_node_inx]; } else { gres_last = ""; } last_mem_alloc_ptr = job_resrcs->memory_allocated; if (last_mem_alloc_ptr) last_mem_alloc = job_resrcs-> memory_allocated[rel_node_inx]; else last_mem_alloc = NO_VAL64; } hostlist_push_host(hl_last, host); free(host); if (bit_inx > last) break; if (abs_node_inx > job_ptr->node_inx[i+1]) { i += 2; abs_node_inx = job_ptr->node_inx[i]; } else { abs_node_inx++; } } if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc(hl_last); xstrfmtcat(out, " Nodes=%s CPU_IDs=%s Mem=%"PRIu64" GRES_IDX=%s", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0, gres_last); xfree(last_hosts); xstrcat(out, line_end); } hostlist_destroy(hl); hostlist_destroy(hl_last); } /****** Line 18 ******/ if (job_ptr->pn_min_memory & MEM_PER_CPU) { job_ptr->pn_min_memory &= (~MEM_PER_CPU); tmp6_ptr = "CPU"; } else tmp6_ptr = "Node"; if (cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)job_ptr->pn_min_cpus, tmp1, sizeof(tmp1), UNIT_NONE, NO_VAL, CONVERT_NUM_UNIT_EXACT); xstrfmtcat(out, "MinCPUsNode=%s ", tmp1); } else { xstrfmtcat(out, "MinCPUsNode=%u ", job_ptr->pn_min_cpus); } convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1), UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT); convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2), UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT); xstrfmtcat(out, "MinMemory%s=%s MinTmpDiskNode=%s", tmp6_ptr, tmp1, tmp2); xstrcat(out, line_end); /****** Line ******/ secs2time_str((time_t)job_ptr->delay_boot, tmp1, sizeof(tmp1)); xstrfmtcat(out, "Features=%s DelayBoot=%s", job_ptr->features, tmp1); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "Gres=%s Reservation=%s", job_ptr->gres, job_ptr->resv_name); xstrcat(out, line_end); /****** Line 20 ******/ xstrfmtcat(out, "OverSubscribe=%s Contiguous=%d Licenses=%s Network=%s", job_share_string(job_ptr->shared), job_ptr->contiguous, job_ptr->licenses, job_ptr->network); xstrcat(out, line_end); /****** Line 21 ******/ xstrfmtcat(out, "Command=%s", job_ptr->command); xstrcat(out, line_end); /****** Line 22 ******/ xstrfmtcat(out, "WorkDir=%s", job_ptr->work_dir); if (cluster_flags & CLUSTER_FLAG_BG) { /****** Line 23 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_BG_ID); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "Block_ID=%s", select_buf); } /****** Line 24 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED_SHORT); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrcat(out, select_buf); } /****** Line 26 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_LINUX_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "CnloadImage=%s", select_buf); } /****** Line 27 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MLOADER_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "MloaderImage=%s", select_buf); } /****** Line 28 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_RAMDISK_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "IoloadImage=%s", select_buf); } } /****** Line (optional) ******/ if (job_ptr->admin_comment) { xstrcat(out, line_end); xstrfmtcat(out, "AdminComment=%s ", job_ptr->admin_comment); } /****** Line (optional) ******/ if (job_ptr->comment) { xstrcat(out, line_end); xstrfmtcat(out, "Comment=%s ", job_ptr->comment); } /****** Line 30 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stderr(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdErr=%s", tmp_path); } /****** Line 31 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stdin(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdIn=%s", tmp_path); } /****** Line 32 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stdout(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdOut=%s", tmp_path); } /****** Line 33 (optional) ******/ if (job_ptr->batch_script) { xstrcat(out, line_end); xstrcat(out, "BatchScript=\n"); xstrcat(out, job_ptr->batch_script); } /****** Line 34 (optional) ******/ if (job_ptr->req_switch) { char time_buf[32]; xstrcat(out, line_end); secs2time_str((time_t) job_ptr->wait4switch, time_buf, sizeof(time_buf)); xstrfmtcat(out, "Switches=%u@%s\n", job_ptr->req_switch, time_buf); } /****** Line 35 (optional) ******/ if (job_ptr->burst_buffer) { xstrcat(out, line_end); xstrfmtcat(out, "BurstBuffer=%s", job_ptr->burst_buffer); } /****** Line (optional) ******/ if (job_ptr->burst_buffer_state) { xstrcat(out, line_end); xstrfmtcat(out, "BurstBufferState=%s", job_ptr->burst_buffer_state); } /****** Line 36 (optional) ******/ if (cpu_freq_debug(NULL, NULL, tmp1, sizeof(tmp1), job_ptr->cpu_freq_gov, job_ptr->cpu_freq_min, job_ptr->cpu_freq_max, NO_VAL) != 0) { xstrcat(out, line_end); xstrcat(out, tmp1); } /****** Line 37 ******/ xstrcat(out, line_end); xstrfmtcat(out, "Power=%s", power_flags_str(job_ptr->power_flags)); /****** Line 38 (optional) ******/ if (job_ptr->bitflags) { xstrcat(out, line_end); if (job_ptr->bitflags & GRES_ENFORCE_BIND) xstrcat(out, "GresEnforceBind=Yes"); if (job_ptr->bitflags & KILL_INV_DEP) xstrcat(out, "KillOInInvalidDependent=Yes"); if (job_ptr->bitflags & NO_KILL_INV_DEP) xstrcat(out, "KillOInInvalidDependent=No"); if (job_ptr->bitflags & SPREAD_JOB) xstrcat(out, "SpreadJob=Yes"); } /****** END OF JOB RECORD ******/ if (one_liner) xstrcat(out, "\n"); else xstrcat(out, "\n\n"); return out; }
/* * set cpu frequency if possible for each cpu of the job step */ extern void cpu_freq_set(stepd_step_rec_t *job) { char freq_detail[100]; uint32_t freq; int i, rc; if ((!cpu_freq_count) || (!cpufreq)) return; for (i = 0; i < cpu_freq_count; i++) { if (cpufreq[i].new_frequency == NO_VAL && cpufreq[i].new_min_freq == NO_VAL && cpufreq[i].new_max_freq == NO_VAL && cpufreq[i].new_governor[0] == '\0') continue; /* Nothing to set on this CPU */ if (debug_flags & DEBUG_FLAG_CPU_FREQ) { info("cpu_freq: current_state cpu=%d org_min=%u " "org_freq=%u org_max=%u org_gpv=%s", i, cpufreq[i].org_min_freq, cpufreq[i].org_frequency, cpufreq[i].org_max_freq, cpufreq[i].org_governor); } /* Max must be set before min, per * www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt */ if (cpufreq[i].new_max_freq != NO_VAL ) { freq = cpufreq[i].new_max_freq; if (cpufreq[i].org_frequency > freq) { /* The current frequency is > requested max, * Set it so it is in range * have to go to UserSpace to do it. */ rc = _cpu_freq_set_gov(job, i, "userspace"); if (rc == SLURM_FAILURE) return; rc = _cpu_freq_set_scaling_freq(job, i, freq, "scaling_setspeed"); if (rc == SLURM_FAILURE) continue; if (cpufreq[i].new_governor[0] == '\0') { /* Not requesting new gov, so restore */ rc = _cpu_freq_set_gov(job, i, cpufreq[i].org_governor); if (rc == SLURM_FAILURE) continue; } } rc = _cpu_freq_set_scaling_freq(job, i, freq, "scaling_max_freq"); if (rc == SLURM_FAILURE) continue; } if (cpufreq[i].new_min_freq != NO_VAL) { freq = cpufreq[i].new_min_freq; if (cpufreq[i].org_frequency < freq) { /* The current frequency is < requested min, * Set it so it is in range * have to go to UserSpace to do it. */ rc = _cpu_freq_set_gov(job, i, "userspace"); if (rc == SLURM_FAILURE) continue; rc = _cpu_freq_set_scaling_freq(job, i, freq, "scaling_setspeed"); if (rc == SLURM_FAILURE) continue; if (cpufreq[i].new_governor[0] == '\0') { /* Not requesting new gov, so restore */ rc= _cpu_freq_set_gov(job, i, cpufreq[i].org_governor); if (rc == SLURM_FAILURE) continue; } } rc= _cpu_freq_set_scaling_freq(job, i, freq, "scaling_min_freq"); if (rc == SLURM_FAILURE) continue; } if (cpufreq[i].new_frequency != NO_VAL) { if (strcmp(cpufreq[i].org_governor,"userspace")) { rc = _cpu_freq_set_gov(job, i, "userspace"); if (rc == SLURM_FAILURE) continue; } rc = _cpu_freq_set_scaling_freq(job, i, cpufreq[i].new_frequency, "scaling_setspeed"); if (rc == SLURM_FAILURE) continue; } if (cpufreq[i].new_governor[0] != '\0') { rc = _cpu_freq_set_gov(job, i, cpufreq[i].new_governor); if (rc == SLURM_FAILURE) continue; } if (debug_flags & DEBUG_FLAG_CPU_FREQ) { cpu_freq_debug(NULL, NULL, freq_detail, sizeof(freq_detail), NO_VAL, cpufreq[i].new_min_freq, cpufreq[i].new_max_freq, cpufreq[i].new_frequency); if (cpufreq[i].new_governor[0] != '\0') { info("cpu_freq: set cpu=%d %s Governor=%s", i, freq_detail, cpufreq[i].new_governor); } else { info("cpu_freq: reset cpu=%d %s", i, freq_detail); } } } }
/* * slurm_sprint_job_step_info - output information about a specific Slurm * job step based upon message as loaded using slurm_get_job_steps * IN job_ptr - an individual job step information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ char * slurm_sprint_job_step_info ( job_step_info_t * job_step_ptr, int one_liner ) { char tmp_node_cnt[40]; char time_str[32]; char limit_str[32]; char tmp_line[128]; char *out = NULL; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); /****** Line 1 ******/ slurm_make_time_str ((time_t *)&job_step_ptr->start_time, time_str, sizeof(time_str)); if (job_step_ptr->time_limit == INFINITE) sprintf(limit_str, "UNLIMITED"); else secs2time_str ((time_t)job_step_ptr->time_limit * 60, limit_str, sizeof(limit_str)); if (job_step_ptr->array_job_id) { if (job_step_ptr->step_id == INFINITE) { /* Pending */ snprintf(tmp_line, sizeof(tmp_line), "StepId=%u_%u.TBD ", job_step_ptr->array_job_id, job_step_ptr->array_task_id); } else { snprintf(tmp_line, sizeof(tmp_line), "StepId=%u_%u.%u ", job_step_ptr->array_job_id, job_step_ptr->array_task_id, job_step_ptr->step_id); } out = xstrdup(tmp_line); } else { if (job_step_ptr->step_id == INFINITE) { /* Pending */ snprintf(tmp_line, sizeof(tmp_line), "StepId=%u.TBD ", job_step_ptr->job_id); } else { snprintf(tmp_line, sizeof(tmp_line), "StepId=%u.%u ", job_step_ptr->job_id, job_step_ptr->step_id); } out = xstrdup(tmp_line); } snprintf(tmp_line, sizeof(tmp_line), "UserId=%u StartTime=%s TimeLimit=%s", job_step_ptr->user_id, time_str, limit_str); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 2 ******/ snprintf(tmp_line, sizeof(tmp_line), "State=%s ", job_state_string(job_step_ptr->state)); xstrcat(out, tmp_line); if (cluster_flags & CLUSTER_FLAG_BG) { char *io_nodes = NULL; select_g_select_jobinfo_get(job_step_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &io_nodes); if (io_nodes) { snprintf(tmp_line, sizeof(tmp_line), "Partition=%s MidplaneList=%s[%s] Gres=%s", job_step_ptr->partition, job_step_ptr->nodes, io_nodes, job_step_ptr->gres); xfree(io_nodes); } else snprintf(tmp_line, sizeof(tmp_line), "Partition=%s MidplaneList=%s Gres=%s", job_step_ptr->partition, job_step_ptr->nodes, job_step_ptr->gres); } else { snprintf(tmp_line, sizeof(tmp_line), "Partition=%s NodeList=%s Gres=%s", job_step_ptr->partition, job_step_ptr->nodes, job_step_ptr->gres); } xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 3 ******/ if (cluster_flags & CLUSTER_FLAG_BGQ) { uint32_t nodes = 0; select_g_select_jobinfo_get(job_step_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &nodes); convert_num_unit((float)nodes, tmp_node_cnt, sizeof(tmp_node_cnt), UNIT_NONE, CONVERT_NUM_UNIT_EXACT); } else { convert_num_unit((float)_nodes_in_list(job_step_ptr->nodes), tmp_node_cnt, sizeof(tmp_node_cnt), UNIT_NONE, CONVERT_NUM_UNIT_EXACT); } snprintf(tmp_line, sizeof(tmp_line), "Nodes=%s CPUs=%u Tasks=%u Name=%s Network=%s", tmp_node_cnt, job_step_ptr->num_cpus, job_step_ptr->num_tasks, job_step_ptr->name, job_step_ptr->network); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 4 ******/ snprintf(tmp_line, sizeof(tmp_line), "TRES=%s", job_step_ptr->tres_alloc_str); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 5 ******/ snprintf(tmp_line, sizeof(tmp_line), "ResvPorts=%s Checkpoint=%u CheckpointDir=%s", job_step_ptr->resv_ports, job_step_ptr->ckpt_interval, job_step_ptr->ckpt_dir); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 6 ******/ if (cpu_freq_debug(NULL, NULL, tmp_line, sizeof(tmp_line), job_step_ptr->cpu_freq_gov, job_step_ptr->cpu_freq_min, job_step_ptr->cpu_freq_max, NO_VAL) != 0) { xstrcat(out, tmp_line); } else { xstrcat(out, "CPUFreqReq=Default"); } xstrfmtcat(out, " Dist=%s", slurm_step_layout_type_name(job_step_ptr->task_dist)); xstrcat(out, "\n\n"); return out; }