コード例 #1
0
ファイル: cpu_frequency.c プロジェクト: asanchez1987/slurm
/*
 * Verify cpu_freq command line option
 *
 * --cpu-freq=arg
 *   where arg is p1{-p2{:p3}}
 *
 * - p1 can be  [#### | low | medium | high | highm1]
 * 	which will set the current frequency, and set the governor to
 * 	UserSpace.
 * - p1 can be [Conservative | OnDemand | Performance | PowerSave | UserSpace]
 *      which will set the governor to the corresponding value.
 * - When p2 is present, p1 will be the minimum frequency and p2 will be
 *   the maximum. The governor will not be changed.
 * - p2 can be  [#### | medium | high | highm1] p2 must be greater than p1.
 * - If the current frequency is < min, it will be set to min.
 *   Likewise, if the current frequency is > max, it will be set to max.
 * - p3 can be [Conservative | OnDemand | Performance | PowerSave | UserSpace]
 *   which will set the governor to the corresponding value.
 *   When p3 is UserSpace, the current frequency is set to p2.
 *   p2 will have been set by PowerCapping.
 *
 * returns -1 on error, 0 otherwise
 */
extern int
cpu_freq_verify_cmdline(const char *arg,
			uint32_t *cpu_freq_min,
			uint32_t *cpu_freq_max,
			uint32_t *cpu_freq_gov)
{
	char *poscolon, *posdash;
	char *p1=NULL, *p2=NULL, *p3=NULL;
	uint32_t frequency;
	int rc = 0;

	if (cpu_freq_govs == 0)
		cpu_freq_govs = slurm_get_cpu_freq_govs();


	if (arg == NULL || cpu_freq_min == NULL || cpu_freq_max == NULL
			|| cpu_freq_gov == NULL) {
		return -1;
	}
	*cpu_freq_min = NO_VAL;
	*cpu_freq_max = NO_VAL;
	*cpu_freq_gov = NO_VAL;
	poscolon = strchr(arg,':');
	if (poscolon) {
		p3 = xstrdup((poscolon+1));
	}
	posdash = strchr(arg,'-');
	if (posdash) {
		p1 = xstrndup(arg, (posdash-arg));
		if (poscolon) {
			p2 = xstrndup((posdash+1), ((poscolon-posdash)-1));
		} else {
			p2 = xstrdup((posdash+1));
		}
	} else {
		if (poscolon) {
			p1 = xstrndup(arg, (poscolon-arg));
		} else {
			p1 = xstrdup(arg);
		}
	}

	frequency = _cpu_freq_check_gov(p1, 0);
	if (frequency != 0) {
		if (p3) {
			error("governor cannot be specified twice "
			      "%s{-}:%s in --cpu-freq", p1, p3);
			rc = -1;
			goto clean;
		}
		*cpu_freq_gov = frequency;
	} else {
		frequency = _cpu_freq_check_freq(p1);
		if (frequency == 0) {
			rc = -1;
			goto clean;
		}
		*cpu_freq_max = frequency;
	}
	if (p2) {
		frequency = _cpu_freq_check_freq(p2);
		if (frequency == 0) {
			rc = -1;
			goto clean;
		}
		*cpu_freq_min = *cpu_freq_max;
		*cpu_freq_max = frequency;
		if (*cpu_freq_max < *cpu_freq_min) {
			error("min cpu-frec (%s) must be < max cpu-freq (%s)",
			      p1, p2);
			rc = -1;
			goto clean;
		}
	}

	if (p3) {
		if (!p2) {
			error("gov on cpu-frec (%s) illegal without max", p3);
			rc = -1;
			goto clean;
		}
		frequency = _cpu_freq_check_gov(p3, 0);
		if (frequency == 0) {
			error("illegal governor: %s on --cpu-freq", p3);
			rc = -1;
			goto clean;
		}
		*cpu_freq_gov = frequency;
	}

clean:
	if (*cpu_freq_gov != NO_VAL) {
		if (((*cpu_freq_gov & cpu_freq_govs)
		    & ~CPU_FREQ_RANGE_FLAG) == 0) {
			error("governor on %s is not allowed in slurm.conf",
			      arg);
			*cpu_freq_gov = NO_VAL;
			rc = -1;
		}
	}
	if (debug_flags & DEBUG_FLAG_CPU_FREQ) {
		cpu_freq_debug("command", "NO_VAL", NULL, 0,
			       *cpu_freq_gov, *cpu_freq_min,
			       *cpu_freq_max, NO_VAL);
	}
	xfree(p1);
	xfree(p2);
	xfree(p3);
	return rc;

}
コード例 #2
0
ファイル: cpu_frequency.c プロジェクト: asanchez1987/slurm
/*
 * reset the cpus used by the process to their
 * default frequency and governor type
 */
extern void
cpu_freq_reset(stepd_step_rec_t *job)
{
	int i, rc, fd;
	char freq_detail[100];

	if ((!cpu_freq_count) || (!cpufreq))
		return;

	for (i = 0; i < cpu_freq_count; i++) {
		if (cpufreq[i].new_frequency == NO_VAL
		    && cpufreq[i].new_min_freq == NO_VAL
		    && cpufreq[i].new_min_freq == NO_VAL
		    && cpufreq[i].new_governor[0] == '\0')
			continue; /* Nothing to reset on this CPU */

		fd = _test_cpu_owner_lock(i, job->jobid);
		if (fd < 0)
			continue;

		if (cpufreq[i].new_frequency != NO_VAL) {
			rc = _cpu_freq_set_gov(job, i, "userspace");
			if (rc == SLURM_FAILURE)
				continue;
			rc = _cpu_freq_set_scaling_freq(job, i,
					cpufreq[i].org_frequency,
					"scaling_setspeed");
			if (rc == SLURM_FAILURE)
				continue;
			cpufreq[i].new_governor[0] = 'u'; /* force gov reset */
		}
		/* Max must be set before min, per
		 * www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt
		 */
		if (cpufreq[i].new_max_freq != NO_VAL) {
			rc = _cpu_freq_set_scaling_freq(job, i,
					cpufreq[i].org_max_freq,
					"scaling_max_freq");
			if (rc == SLURM_FAILURE)
				continue;
		}
		if (cpufreq[i].new_min_freq != NO_VAL) {
			rc = _cpu_freq_set_scaling_freq(job, i,
					cpufreq[i].org_min_freq,
					"scaling_min_freq");
			if (rc == SLURM_FAILURE)
				continue;
		}
		if (cpufreq[i].new_governor[0] != '\0') {
			rc = _cpu_freq_set_gov(job, i, cpufreq[i].org_governor);
			if (rc == SLURM_FAILURE)
				continue;
		}

		if (debug_flags & DEBUG_FLAG_CPU_FREQ) {
			cpu_freq_debug(NULL, NULL,
					freq_detail, sizeof(freq_detail),
					NO_VAL, cpufreq[i].org_min_freq,
					cpufreq[i].org_max_freq,
					cpufreq[i].org_frequency);
			if (cpufreq[i].new_governor[0] != '\0') {
				info("cpu_freq: reset cpu=%d %s Governor=%s",
				     i, freq_detail, cpufreq[i].org_governor);
			} else {
				info("cpu_freq: reset cpu=%d %s", i,
				     freq_detail);
			}
		}
	}
	xfree(slurmd_spooldir);
}
コード例 #3
0
ファイル: job_info.c プロジェクト: fafik23/slurm
/*
 * slurm_sprint_job_info - output information about a specific Slurm
 *	job based upon message as loaded using slurm_load_jobs
 * IN job_ptr - an individual job information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
extern char *
slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner )
{
	int i, j, k;
	char time_str[32], *group_name, *user_name;
	char *gres_last = "", tmp1[128], tmp2[128];
	char *tmp6_ptr;
	char tmp_line[1024 * 128];
	char tmp_path[MAXPATHLEN];
	char *ionodes = NULL;
	uint16_t exit_status = 0, term_sig = 0;
	job_resources_t *job_resrcs = job_ptr->job_resrcs;
	char *out = NULL;
	time_t run_time;
	uint32_t min_nodes, max_nodes = 0;
	char *nodelist = "NodeList";
	bitstr_t *cpu_bitmap;
	char *host;
	int sock_inx, sock_reps, last;
	int abs_node_inx, rel_node_inx;
	int64_t nice;
	int bit_inx, bit_reps;
	uint64_t *last_mem_alloc_ptr = NULL;
	uint64_t last_mem_alloc = NO_VAL64;
	char *last_hosts;
	hostlist_t hl, hl_last;
	char select_buf[122];
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	uint32_t threads;
	char *line_end = (one_liner) ? " " : "\n   ";

	if (cluster_flags & CLUSTER_FLAG_BG) {
		nodelist = "MidplaneList";
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_IONODES,
					    &ionodes);
	}

	/****** Line 1 ******/
	xstrfmtcat(out, "JobId=%u ", job_ptr->job_id);

	if (job_ptr->array_job_id) {
		if (job_ptr->array_task_str) {
			xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%s ",
				   job_ptr->array_job_id,
				   job_ptr->array_task_str);
		} else {
			xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%u ",
				   job_ptr->array_job_id,
				   job_ptr->array_task_id);
		}
	}
	xstrfmtcat(out, "JobName=%s", job_ptr->name);
	xstrcat(out, line_end);

	/****** Line 2 ******/
	user_name = uid_to_string((uid_t) job_ptr->user_id);
	group_name = gid_to_string((gid_t) job_ptr->group_id);
	xstrfmtcat(out, "UserId=%s(%u) GroupId=%s(%u) MCS_label=%s",
		   user_name, job_ptr->user_id, group_name, job_ptr->group_id,
		   (job_ptr->mcs_label==NULL) ? "N/A" : job_ptr->mcs_label);
	xfree(user_name);
	xfree(group_name);
	xstrcat(out, line_end);

	/****** Line 3 ******/
	nice = ((int64_t)job_ptr->nice) - NICE_OFFSET;
	xstrfmtcat(out, "Priority=%u Nice=%"PRIi64" Account=%s QOS=%s",
		   job_ptr->priority, nice, job_ptr->account, job_ptr->qos);
	if (slurm_get_track_wckey())
		xstrfmtcat(out, " WCKey=%s", job_ptr->wckey);
	xstrcat(out, line_end);

	/****** Line 4 ******/
	xstrfmtcat(out, "JobState=%s ", job_state_string(job_ptr->job_state));

	if (job_ptr->state_desc) {
		/* Replace white space with underscore for easier parsing */
		for (j=0; job_ptr->state_desc[j]; j++) {
			if (isspace((int)job_ptr->state_desc[j]))
				job_ptr->state_desc[j] = '_';
		}
		xstrfmtcat(out, "Reason=%s ", job_ptr->state_desc);
	} else
		xstrfmtcat(out, "Reason=%s ", job_reason_string(job_ptr->state_reason));

	xstrfmtcat(out, "Dependency=%s", job_ptr->dependency);
	xstrcat(out, line_end);

	/****** Line 5 ******/
	xstrfmtcat(out, "Requeue=%u Restarts=%u BatchFlag=%u Reboot=%u ",
		 job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag,
		 job_ptr->reboot);
	if (WIFSIGNALED(job_ptr->exit_code))
		term_sig = WTERMSIG(job_ptr->exit_code);
	exit_status = WEXITSTATUS(job_ptr->exit_code);
	xstrfmtcat(out, "ExitCode=%u:%u", exit_status, term_sig);
	xstrcat(out, line_end);

	/****** Line 5a (optional) ******/
	if (job_ptr->show_flags & SHOW_DETAIL) {
		if (WIFSIGNALED(job_ptr->derived_ec))
			term_sig = WTERMSIG(job_ptr->derived_ec);
		else
			term_sig = 0;
		exit_status = WEXITSTATUS(job_ptr->derived_ec);
		xstrfmtcat(out, "DerivedExitCode=%u:%u", exit_status, term_sig);
		xstrcat(out, line_end);
	}

	/****** Line 6 ******/
	if (IS_JOB_PENDING(job_ptr))
		run_time = 0;
	else if (IS_JOB_SUSPENDED(job_ptr))
		run_time = job_ptr->pre_sus_time;
	else {
		time_t end_time;
		if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0))
			end_time = time(NULL);
		else
			end_time = job_ptr->end_time;
		if (job_ptr->suspend_time) {
			run_time = (time_t)
				(difftime(end_time, job_ptr->suspend_time)
				 + job_ptr->pre_sus_time);
		} else
			run_time = (time_t)
				difftime(end_time, job_ptr->start_time);
	}
	secs2time_str(run_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "RunTime=%s ", time_str);

	if (job_ptr->time_limit == NO_VAL)
		xstrcat(out, "TimeLimit=Partition_Limit ");
	else {
		mins2time_str(job_ptr->time_limit, time_str, sizeof(time_str));
		xstrfmtcat(out, "TimeLimit=%s ", time_str);
	}

	if (job_ptr->time_min == 0)
		xstrcat(out, "TimeMin=N/A");
	else {
		mins2time_str(job_ptr->time_min, time_str, sizeof(time_str));
		xstrfmtcat(out, "TimeMin=%s", time_str);
	}
	xstrcat(out, line_end);

	/****** Line 7 ******/
	slurm_make_time_str(&job_ptr->submit_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "SubmitTime=%s ", time_str);

	slurm_make_time_str(&job_ptr->eligible_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "EligibleTime=%s", time_str);

	xstrcat(out, line_end);

	/****** Line 8 (optional) ******/
	if (job_ptr->resize_time) {
		slurm_make_time_str(&job_ptr->resize_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "ResizeTime=%s", time_str);
		xstrcat(out, line_end);
	}

	/****** Line 9 ******/
	slurm_make_time_str(&job_ptr->start_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "StartTime=%s ", time_str);

	if ((job_ptr->time_limit == INFINITE) &&
	    (job_ptr->end_time > time(NULL)))
		xstrcat(out, "EndTime=Unknown ");
	else {
		slurm_make_time_str(&job_ptr->end_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "EndTime=%s ", time_str);
	}

	if (job_ptr->deadline) {
		slurm_make_time_str(&job_ptr->deadline, time_str, sizeof(time_str));
		xstrfmtcat(out, "Deadline=%s", time_str);
	} else {
		xstrcat(out, "Deadline=N/A");
	}

	xstrcat(out, line_end);

	/****** Line 10 ******/
	if (job_ptr->preempt_time == 0)
		xstrcat(out, "PreemptTime=None ");
	else {
		slurm_make_time_str(&job_ptr->preempt_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "PreemptTime=%s ", time_str);
	}

	if (job_ptr->suspend_time) {
		slurm_make_time_str(&job_ptr->suspend_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "SuspendTime=%s ", time_str);
	} else
		xstrcat(out, "SuspendTime=None ");

	xstrfmtcat(out, "SecsPreSuspend=%ld", (long int)job_ptr->pre_sus_time);
	xstrcat(out, line_end);

	/****** Line 11 ******/
	xstrfmtcat(out, "Partition=%s AllocNode:Sid=%s:%u",
		   job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid);
	xstrcat(out, line_end);

	/****** Line 12 ******/
	xstrfmtcat(out, "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes,
		   nodelist, job_ptr->exc_nodes);
	xstrcat(out, line_end);

	/****** Line 13 ******/
	xstrfmtcat(out, "%s=%s", nodelist, job_ptr->nodes);
	if (job_ptr->nodes && ionodes) {
		xstrfmtcat(out, "[%s]", ionodes);
		xfree(ionodes);
	}
	if (job_ptr->sched_nodes)
		xstrfmtcat(out, " Sched%s=%s", nodelist, job_ptr->sched_nodes);

	xstrcat(out, line_end);

	/****** Line 14 (optional) ******/
	if (job_ptr->batch_host) {
		xstrfmtcat(out, "BatchHost=%s", job_ptr->batch_host);
		xstrcat(out, line_end);
	}

	/****** Line 14a (optional) ******/
	if (job_ptr->fed_siblings) {
		xstrfmtcat(out, "FedOrigin=%s FedSiblings=%s",
			   job_ptr->fed_origin_str, job_ptr->fed_siblings_str);
		xstrcat(out, line_end);
	}

	/****** Line 15 ******/
	if (cluster_flags & CLUSTER_FLAG_BG) {
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &min_nodes);
		if ((min_nodes == 0) || (min_nodes == NO_VAL)) {
			min_nodes = job_ptr->num_nodes;
			max_nodes = job_ptr->max_nodes;
		} else if (job_ptr->max_nodes)
			max_nodes = min_nodes;
	} else if (IS_JOB_PENDING(job_ptr)) {
		min_nodes = job_ptr->num_nodes;
		max_nodes = job_ptr->max_nodes;
		if (max_nodes && (max_nodes < min_nodes))
			min_nodes = max_nodes;
	} else {
		min_nodes = job_ptr->num_nodes;
		max_nodes = 0;
	}

	_sprint_range(tmp_line, sizeof(tmp_line), min_nodes, max_nodes);
	xstrfmtcat(out, "NumNodes=%s ", tmp_line);
	_sprint_range(tmp_line, sizeof(tmp_line), job_ptr->num_cpus, job_ptr->max_cpus);
	xstrfmtcat(out, "NumCPUs=%s ", tmp_line);

	xstrfmtcat(out, "NumTasks=%u ", job_ptr->num_tasks);
	xstrfmtcat(out, "CPUs/Task=%u ", job_ptr->cpus_per_task);

	if (job_ptr->boards_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "ReqB:S:C:T=*:");
	else
		xstrfmtcat(out, "ReqB:S:C:T=%u:", job_ptr->boards_per_node);

	if (job_ptr->sockets_per_board == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->sockets_per_board);

	if (job_ptr->cores_per_socket == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->cores_per_socket);

	if (job_ptr->threads_per_core == (uint16_t) NO_VAL)
		xstrcat(out, "*");
	else
		xstrfmtcat(out, "%u", job_ptr->threads_per_core);

	xstrcat(out, line_end);

	/****** Line 16 ******/
	/* Tres should already of been converted at this point from simple */
	xstrfmtcat(out, "TRES=%s",
		   job_ptr->tres_alloc_str ? job_ptr->tres_alloc_str
					   : job_ptr->tres_req_str);
	xstrcat(out, line_end);

	/****** Line 17 ******/
	if (job_ptr->sockets_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "Socks/Node=* ");
	else
		xstrfmtcat(out, "Socks/Node=%u ", job_ptr->sockets_per_node);

	if (job_ptr->ntasks_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "NtasksPerN:B:S:C=*:");
	else
		xstrfmtcat(out, "NtasksPerN:B:S:C=%u:", job_ptr->ntasks_per_node);

	if (job_ptr->ntasks_per_board == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->ntasks_per_board);

	if ((job_ptr->ntasks_per_socket == (uint16_t) NO_VAL) ||
	    (job_ptr->ntasks_per_socket == (uint16_t) INFINITE))
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->ntasks_per_socket);

	if ((job_ptr->ntasks_per_core == (uint16_t) NO_VAL) ||
	    (job_ptr->ntasks_per_core == (uint16_t) INFINITE))
		xstrcat(out, "* ");
	else
		xstrfmtcat(out, "%u ", job_ptr->ntasks_per_core);

	if (job_ptr->core_spec == (uint16_t) NO_VAL)
		xstrcat(out, "CoreSpec=*");
	else if (job_ptr->core_spec & CORE_SPEC_THREAD)
		xstrfmtcat(out, "ThreadSpec=%d",
			   (job_ptr->core_spec & (~CORE_SPEC_THREAD)));
	else
		xstrfmtcat(out, "CoreSpec=%u", job_ptr->core_spec);

	xstrcat(out, line_end);

	if (job_resrcs && cluster_flags & CLUSTER_FLAG_BG) {
		if ((job_resrcs->cpu_array_cnt > 0) &&
		    (job_resrcs->cpu_array_value) &&
		    (job_resrcs->cpu_array_reps)) {
			int length = 0;
			xstrcat(out, "CPUs=");
			for (i = 0; i < job_resrcs->cpu_array_cnt; i++) {
				/* only print 60 characters worth of this record */
				if (length > 60) {
					/* skip to last CPU group entry */
					if (i < job_resrcs->cpu_array_cnt - 1) {
						continue;
					}
					/* add ellipsis before last entry */
					xstrcat(out, "...,");
				}

				length += xstrfmtcat(out, "%d", job_resrcs->cpus[i]);
				if (job_resrcs->cpu_array_reps[i] > 1) {
					length += xstrfmtcat(out, "*%d",
							     job_resrcs->cpu_array_reps[i]);
				}
				if (i < job_resrcs->cpu_array_cnt - 1) {
					xstrcat(out, ",");
					length++;
				}
			}
			xstrcat(out, line_end);
		}
	} else if (job_resrcs && job_resrcs->core_bitmap &&
		   ((last = bit_fls(job_resrcs->core_bitmap)) != -1)) {
		hl = hostlist_create(job_resrcs->nodes);
		if (!hl) {
			error("slurm_sprint_job_info: hostlist_create: %s",
			      job_resrcs->nodes);
			return NULL;
		}
		hl_last = hostlist_create(NULL);
		if (!hl_last) {
			error("slurm_sprint_job_info: hostlist_create: NULL");
			hostlist_destroy(hl);
			return NULL;
		}

		bit_inx = 0;
		i = sock_inx = sock_reps = 0;
		abs_node_inx = job_ptr->node_inx[i];

		gres_last = "";
		/* tmp1[] stores the current cpu(s) allocated */
		tmp2[0] = '\0';	/* stores last cpu(s) allocated */
		for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts;
		     rel_node_inx++) {

			if (sock_reps >=
			    job_resrcs->sock_core_rep_count[sock_inx]) {
				sock_inx++;
				sock_reps = 0;
			}
			sock_reps++;

			bit_reps = job_resrcs->sockets_per_node[sock_inx] *
				   job_resrcs->cores_per_socket[sock_inx];
			host = hostlist_shift(hl);
			threads = _threads_per_core(host);
			cpu_bitmap = bit_alloc(bit_reps * threads);
			for (j = 0; j < bit_reps; j++) {
				if (bit_test(job_resrcs->core_bitmap, bit_inx)){
					for (k = 0; k < threads; k++)
						bit_set(cpu_bitmap,
							(j * threads) + k);
				}
				bit_inx++;
			}
			bit_fmt(tmp1, sizeof(tmp1), cpu_bitmap);
			FREE_NULL_BITMAP(cpu_bitmap);
			/*
			 * If the allocation values for this host are not the
			 * same as the last host, print the report of the last
			 * group of hosts that had identical allocation values.
			 */
			if (xstrcmp(tmp1, tmp2) ||
			    ((rel_node_inx < job_ptr->gres_detail_cnt) &&
			     xstrcmp(job_ptr->gres_detail_str[rel_node_inx],
				     gres_last)) ||
			    (last_mem_alloc_ptr != job_resrcs->memory_allocated) ||
			    (job_resrcs->memory_allocated &&
			     (last_mem_alloc !=
			      job_resrcs->memory_allocated[rel_node_inx]))) {
				if (hostlist_count(hl_last)) {
					last_hosts =
						hostlist_ranged_string_xmalloc(
						hl_last);
					xstrfmtcat(out,
						   "  Nodes=%s CPU_IDs=%s "
						   "Mem=%"PRIu64" GRES_IDX=%s",
						   last_hosts, tmp2,
						   last_mem_alloc_ptr ?
						   last_mem_alloc : 0,
						    gres_last);
					xfree(last_hosts);
					xstrcat(out, line_end);

					hostlist_destroy(hl_last);
					hl_last = hostlist_create(NULL);
				}
				strcpy(tmp2, tmp1);
				if (rel_node_inx < job_ptr->gres_detail_cnt) {
					gres_last = job_ptr->
						    gres_detail_str[rel_node_inx];
				} else {
					gres_last = "";
				}
				last_mem_alloc_ptr = job_resrcs->memory_allocated;
				if (last_mem_alloc_ptr)
					last_mem_alloc = job_resrcs->
						memory_allocated[rel_node_inx];
				else
					last_mem_alloc = NO_VAL64;
			}
			hostlist_push_host(hl_last, host);
			free(host);

			if (bit_inx > last)
				break;

			if (abs_node_inx > job_ptr->node_inx[i+1]) {
				i += 2;
				abs_node_inx = job_ptr->node_inx[i];
			} else {
				abs_node_inx++;
			}
		}

		if (hostlist_count(hl_last)) {
			last_hosts = hostlist_ranged_string_xmalloc(hl_last);
			xstrfmtcat(out, "  Nodes=%s CPU_IDs=%s Mem=%"PRIu64" GRES_IDX=%s",
				 last_hosts, tmp2,
				 last_mem_alloc_ptr ? last_mem_alloc : 0,
				 gres_last);
			xfree(last_hosts);
			xstrcat(out, line_end);
		}
		hostlist_destroy(hl);
		hostlist_destroy(hl_last);
	}
	/****** Line 18 ******/
	if (job_ptr->pn_min_memory & MEM_PER_CPU) {
		job_ptr->pn_min_memory &= (~MEM_PER_CPU);
		tmp6_ptr = "CPU";
	} else
		tmp6_ptr = "Node";

	if (cluster_flags & CLUSTER_FLAG_BG) {
		convert_num_unit((float)job_ptr->pn_min_cpus, tmp1,
				 sizeof(tmp1), UNIT_NONE, NO_VAL,
				 CONVERT_NUM_UNIT_EXACT);
		xstrfmtcat(out, "MinCPUsNode=%s ", tmp1);
	} else {
		xstrfmtcat(out, "MinCPUsNode=%u ", job_ptr->pn_min_cpus);
	}

	convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1),
			 UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT);
	convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2),
			 UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT);
	xstrfmtcat(out, "MinMemory%s=%s MinTmpDiskNode=%s", tmp6_ptr, tmp1, tmp2);
	xstrcat(out, line_end);

	/****** Line ******/
	secs2time_str((time_t)job_ptr->delay_boot, tmp1, sizeof(tmp1));
	xstrfmtcat(out, "Features=%s DelayBoot=%s", job_ptr->features, tmp1);
	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "Gres=%s Reservation=%s",
		   job_ptr->gres, job_ptr->resv_name);
	xstrcat(out, line_end);

	/****** Line 20 ******/
	xstrfmtcat(out, "OverSubscribe=%s Contiguous=%d Licenses=%s Network=%s",
		   job_share_string(job_ptr->shared), job_ptr->contiguous,
		   job_ptr->licenses, job_ptr->network);
	xstrcat(out, line_end);

	/****** Line 21 ******/
	xstrfmtcat(out, "Command=%s", job_ptr->command);
	xstrcat(out, line_end);

	/****** Line 22 ******/
	xstrfmtcat(out, "WorkDir=%s", job_ptr->work_dir);

	if (cluster_flags & CLUSTER_FLAG_BG) {
		/****** Line 23 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_BG_ID);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "Block_ID=%s", select_buf);
		}

		/****** Line 24 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MIXED_SHORT);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrcat(out, select_buf);
		}

		/****** Line 26 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_LINUX_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "CnloadImage=%s", select_buf);
		}
		/****** Line 27 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MLOADER_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "MloaderImage=%s", select_buf);
		}
		/****** Line 28 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_RAMDISK_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "IoloadImage=%s", select_buf);
		}
	}

	/****** Line (optional) ******/
	if (job_ptr->admin_comment) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "AdminComment=%s ", job_ptr->admin_comment);
	}

	/****** Line (optional) ******/
	if (job_ptr->comment) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "Comment=%s ", job_ptr->comment);
	}

	/****** Line 30 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stderr(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdErr=%s", tmp_path);
	}

	/****** Line 31 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stdin(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdIn=%s", tmp_path);
	}

	/****** Line 32 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stdout(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdOut=%s", tmp_path);
	}

	/****** Line 33 (optional) ******/
	if (job_ptr->batch_script) {
		xstrcat(out, line_end);
		xstrcat(out, "BatchScript=\n");
		xstrcat(out, job_ptr->batch_script);
	}

	/****** Line 34 (optional) ******/
	if (job_ptr->req_switch) {
		char time_buf[32];
		xstrcat(out, line_end);
		secs2time_str((time_t) job_ptr->wait4switch, time_buf,
			      sizeof(time_buf));
		xstrfmtcat(out, "Switches=%u@%s\n", job_ptr->req_switch, time_buf);
	}

	/****** Line 35 (optional) ******/
	if (job_ptr->burst_buffer) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "BurstBuffer=%s", job_ptr->burst_buffer);
	}

	/****** Line (optional) ******/
	if (job_ptr->burst_buffer_state) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "BurstBufferState=%s",
			   job_ptr->burst_buffer_state);
	}

	/****** Line 36 (optional) ******/
	if (cpu_freq_debug(NULL, NULL, tmp1, sizeof(tmp1),
			   job_ptr->cpu_freq_gov, job_ptr->cpu_freq_min,
			   job_ptr->cpu_freq_max, NO_VAL) != 0) {
		xstrcat(out, line_end);
		xstrcat(out, tmp1);
	}

	/****** Line 37 ******/
	xstrcat(out, line_end);
	xstrfmtcat(out, "Power=%s", power_flags_str(job_ptr->power_flags));

	/****** Line 38 (optional) ******/
	if (job_ptr->bitflags) {
		xstrcat(out, line_end);
		if (job_ptr->bitflags & GRES_ENFORCE_BIND)
			xstrcat(out, "GresEnforceBind=Yes");
		if (job_ptr->bitflags & KILL_INV_DEP)
			xstrcat(out, "KillOInInvalidDependent=Yes");
		if (job_ptr->bitflags & NO_KILL_INV_DEP)
			xstrcat(out, "KillOInInvalidDependent=No");
		if (job_ptr->bitflags & SPREAD_JOB)
			xstrcat(out, "SpreadJob=Yes");
	}

	/****** END OF JOB RECORD ******/
	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;
}
コード例 #4
0
ファイル: cpu_frequency.c プロジェクト: asanchez1987/slurm
/*
 * set cpu frequency if possible for each cpu of the job step
 */
extern void
cpu_freq_set(stepd_step_rec_t *job)
{
	char freq_detail[100];
	uint32_t freq;
	int i, rc;

	if ((!cpu_freq_count) || (!cpufreq))
		return;
	for (i = 0; i < cpu_freq_count; i++) {
		if (cpufreq[i].new_frequency == NO_VAL
		    && cpufreq[i].new_min_freq == NO_VAL
	            && cpufreq[i].new_max_freq == NO_VAL
		    && cpufreq[i].new_governor[0] == '\0')
			continue; /* Nothing to set on this CPU */
		if (debug_flags & DEBUG_FLAG_CPU_FREQ) {
			info("cpu_freq: current_state cpu=%d org_min=%u "
			     "org_freq=%u org_max=%u org_gpv=%s", i,
			     cpufreq[i].org_min_freq,
			     cpufreq[i].org_frequency,
			     cpufreq[i].org_max_freq,
			     cpufreq[i].org_governor);
		}

		/* Max must be set before min, per
		 * www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt
		 */
		if (cpufreq[i].new_max_freq != NO_VAL ) {
			freq = cpufreq[i].new_max_freq;
			if (cpufreq[i].org_frequency > freq) {
				/* The current frequency is > requested max,
				 * Set it so it is in range
				 * have to go to UserSpace to do it. */
				rc = _cpu_freq_set_gov(job, i, "userspace");
				if (rc == SLURM_FAILURE)
					return;
				rc = _cpu_freq_set_scaling_freq(job, i, freq,
						         "scaling_setspeed");
				if (rc == SLURM_FAILURE)
					continue;
				if (cpufreq[i].new_governor[0] == '\0') {
					/* Not requesting new gov, so restore */
					rc = _cpu_freq_set_gov(job, i,
						cpufreq[i].org_governor);
					if (rc == SLURM_FAILURE)
						continue;
				}
			}
			rc = _cpu_freq_set_scaling_freq(job, i, freq,
							"scaling_max_freq");
			if (rc == SLURM_FAILURE)
				continue;
		}
		if (cpufreq[i].new_min_freq != NO_VAL) {
			freq = cpufreq[i].new_min_freq;
			if (cpufreq[i].org_frequency < freq) {
				/* The current frequency is < requested min,
				 * Set it so it is in range
				 * have to go to UserSpace to do it. */
				rc = _cpu_freq_set_gov(job, i, "userspace");
				if (rc == SLURM_FAILURE)
					continue;
				rc = _cpu_freq_set_scaling_freq(job, i, freq,
						         "scaling_setspeed");
				if (rc == SLURM_FAILURE)
					continue;
				if (cpufreq[i].new_governor[0] == '\0') {
					/* Not requesting new gov, so restore */
					rc= _cpu_freq_set_gov(job, i,
						cpufreq[i].org_governor);
					if (rc == SLURM_FAILURE)
						continue;
				}
			}
			rc= _cpu_freq_set_scaling_freq(job, i, freq,
						       "scaling_min_freq");
			if (rc == SLURM_FAILURE)
				continue;
		}
		if (cpufreq[i].new_frequency != NO_VAL) {
			if (strcmp(cpufreq[i].org_governor,"userspace")) {
				rc = _cpu_freq_set_gov(job, i, "userspace");
				if (rc == SLURM_FAILURE)
					continue;
			}
			rc = _cpu_freq_set_scaling_freq(job, i,
					cpufreq[i].new_frequency,
					"scaling_setspeed");
			if (rc == SLURM_FAILURE)
				continue;
		}
		if (cpufreq[i].new_governor[0] != '\0') {
			rc = _cpu_freq_set_gov(job, i, cpufreq[i].new_governor);
			if (rc == SLURM_FAILURE)
				continue;
		}
		if (debug_flags & DEBUG_FLAG_CPU_FREQ) {
			cpu_freq_debug(NULL, NULL,
					freq_detail, sizeof(freq_detail),
					NO_VAL, cpufreq[i].new_min_freq,
					cpufreq[i].new_max_freq,
					cpufreq[i].new_frequency);
			if (cpufreq[i].new_governor[0] != '\0') {
				info("cpu_freq: set cpu=%d %s Governor=%s",
				     i, freq_detail, cpufreq[i].new_governor);
			} else {
				info("cpu_freq: reset cpu=%d %s", i,
				     freq_detail);
			}
		}
	}
}
コード例 #5
0
/*
 * slurm_sprint_job_step_info - output information about a specific Slurm
 *	job step based upon message as loaded using slurm_get_job_steps
 * IN job_ptr - an individual job step information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
char *
slurm_sprint_job_step_info ( job_step_info_t * job_step_ptr,
			    int one_liner )
{
	char tmp_node_cnt[40];
	char time_str[32];
	char limit_str[32];
	char tmp_line[128];
	char *out = NULL;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	/****** Line 1 ******/
	slurm_make_time_str ((time_t *)&job_step_ptr->start_time, time_str,
		sizeof(time_str));
	if (job_step_ptr->time_limit == INFINITE)
		sprintf(limit_str, "UNLIMITED");
	else
		secs2time_str ((time_t)job_step_ptr->time_limit * 60,
				limit_str, sizeof(limit_str));
	if (job_step_ptr->array_job_id) {
		if (job_step_ptr->step_id == INFINITE) {	/* Pending */
			snprintf(tmp_line, sizeof(tmp_line),
				 "StepId=%u_%u.TBD ",
				 job_step_ptr->array_job_id,
				 job_step_ptr->array_task_id);
		} else {
			snprintf(tmp_line, sizeof(tmp_line), "StepId=%u_%u.%u ",
				 job_step_ptr->array_job_id,
				 job_step_ptr->array_task_id,
				 job_step_ptr->step_id);
		}
		out = xstrdup(tmp_line);
	} else {
		if (job_step_ptr->step_id == INFINITE) {	/* Pending */
			snprintf(tmp_line, sizeof(tmp_line), "StepId=%u.TBD ",
				 job_step_ptr->job_id);
		} else {
			snprintf(tmp_line, sizeof(tmp_line), "StepId=%u.%u ",
				 job_step_ptr->job_id, job_step_ptr->step_id);
		}
		out = xstrdup(tmp_line);
	}
	snprintf(tmp_line, sizeof(tmp_line),
		 "UserId=%u StartTime=%s TimeLimit=%s",
		 job_step_ptr->user_id, time_str, limit_str);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 2 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "State=%s ",
		 job_state_string(job_step_ptr->state));
	xstrcat(out, tmp_line);
	if (cluster_flags & CLUSTER_FLAG_BG) {
		char *io_nodes = NULL;
		select_g_select_jobinfo_get(job_step_ptr->select_jobinfo,
					    SELECT_JOBDATA_IONODES,
					    &io_nodes);
		if (io_nodes) {
			snprintf(tmp_line, sizeof(tmp_line),
				 "Partition=%s MidplaneList=%s[%s] Gres=%s",
				 job_step_ptr->partition,
				 job_step_ptr->nodes, io_nodes,
				 job_step_ptr->gres);
			xfree(io_nodes);
		} else
			snprintf(tmp_line, sizeof(tmp_line),
				 "Partition=%s MidplaneList=%s Gres=%s",
				 job_step_ptr->partition,
				 job_step_ptr->nodes,
				 job_step_ptr->gres);
	} else {
		snprintf(tmp_line, sizeof(tmp_line),
			"Partition=%s NodeList=%s Gres=%s",
			job_step_ptr->partition, job_step_ptr->nodes,
			job_step_ptr->gres);
	}
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 3 ******/
	if (cluster_flags & CLUSTER_FLAG_BGQ) {
		uint32_t nodes = 0;
		select_g_select_jobinfo_get(job_step_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &nodes);
		convert_num_unit((float)nodes, tmp_node_cnt,
				 sizeof(tmp_node_cnt), UNIT_NONE,
				 CONVERT_NUM_UNIT_EXACT);
	} else {
		convert_num_unit((float)_nodes_in_list(job_step_ptr->nodes),
				 tmp_node_cnt, sizeof(tmp_node_cnt),
				 UNIT_NONE, CONVERT_NUM_UNIT_EXACT);
	}

	snprintf(tmp_line, sizeof(tmp_line),
		"Nodes=%s CPUs=%u Tasks=%u Name=%s Network=%s",
		 tmp_node_cnt, job_step_ptr->num_cpus, job_step_ptr->num_tasks,
		 job_step_ptr->name, job_step_ptr->network);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 4 ******/
	snprintf(tmp_line, sizeof(tmp_line), "TRES=%s",
		 job_step_ptr->tres_alloc_str);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 5 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		"ResvPorts=%s Checkpoint=%u CheckpointDir=%s",
		 job_step_ptr->resv_ports,
		 job_step_ptr->ckpt_interval, job_step_ptr->ckpt_dir);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 6 ******/
	if (cpu_freq_debug(NULL, NULL, tmp_line, sizeof(tmp_line),
			   job_step_ptr->cpu_freq_gov,
			   job_step_ptr->cpu_freq_min,
			   job_step_ptr->cpu_freq_max, NO_VAL) != 0) {
		xstrcat(out, tmp_line);
	} else {
		xstrcat(out, "CPUFreqReq=Default");
	}
	xstrfmtcat(out, " Dist=%s",
		   slurm_step_layout_type_name(job_step_ptr->task_dist));
	xstrcat(out, "\n\n");

	return out;
}