Exemple #1
0
int _print_job_reason(job_info_t * job, int width, bool right, char* suffix)
{
	if (job == NULL)        /* Print the Header instead */
		_print_str("REASON", width, right, true);
	else {
		char id[FORMAT_STRING_SIZE], *reason;
		if (job->state_desc)
			reason = job->state_desc;
		else
			reason = job_reason_string(job->state_reason);
		snprintf(id, FORMAT_STRING_SIZE, "%s", reason);
		_print_str(id, width, right, true);
	}
	if (suffix)
		printf("%s", suffix);
	return SLURM_SUCCESS;
}
Exemple #2
0
int _print_job_reason_list(job_info_t * job, int width, bool right,
		char* suffix)
{
	if (job == NULL) {	/* Print the Header instead */
		char *title = "NODELIST(REASON)";
		if (params.cluster_flags & CLUSTER_FLAG_BG)
			title = "MIDPLANELIST(REASON)";
		_print_str(title, width, right, false);
	} else if (!IS_JOB_COMPLETING(job)
		   && (IS_JOB_PENDING(job)
		       || IS_JOB_TIMEOUT(job)
		       || IS_JOB_FAILED(job))) {
		char id[FORMAT_STRING_SIZE], *reason;
		if (job->state_desc)
			reason = job->state_desc;
		else
			reason = job_reason_string(job->state_reason);
		snprintf(id, FORMAT_STRING_SIZE, "(%s)", reason);
		_print_str(id, width, right, true);
	} else {
		char *nodes = xstrdup(job->nodes);
		char *ionodes = NULL;

		select_g_select_jobinfo_get(job->select_jobinfo,
					    SELECT_JOBDATA_IONODES,
					    &ionodes);
		if (ionodes) {
			xstrfmtcat(nodes, "[%s]", ionodes);
			xfree(ionodes);
			_print_str(nodes, width, right, false);
		} else
			_print_nodes(nodes, width, right, false);
		xfree(nodes);
	}
	if (suffix)
		printf("%s", suffix);
	return SLURM_SUCCESS;
}
Exemple #3
0
/*
 * Attempt to start a job
 * jobid     (IN) - job id
 * task_cnt  (IN) - total count of tasks to start
 * hostlist  (IN) - SLURM hostlist expression with no repeated hostnames
 * tasklist  (IN/OUT) - comma separated list of hosts with tasks to be started,
 *                  list hostname once per task to start
 * comment_ptr (IN) - new comment field for the job or NULL for no change
 * err_code (OUT) - Moab error code
 * err_msg  (OUT) - Moab error message
 */
static int	_start_job(uint32_t jobid, int task_cnt, char *hostlist,
			char *tasklist, char *comment_ptr,
			int *err_code, char **err_msg)
{
	int rc = 0, old_task_cnt = 1;
	struct job_record *job_ptr;
	/* Write lock on job info, read lock on node info */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK };
	char *new_node_list = NULL;
	static char tmp_msg[128];
	bitstr_t *new_bitmap = (bitstr_t *) NULL;
	bitstr_t *save_req_bitmap = (bitstr_t *) NULL;
	bitoff_t i, bsize;
	int ll; /* layout info index */
	char *node_name, *node_idx, *node_cur, *save_req_nodes = NULL;
	size_t node_name_len;
	static uint32_t cr_test = 0, cr_enabled = 0;

	if (cr_test == 0) {
		select_g_get_info_from_plugin(SELECT_CR_PLUGIN, NULL,
						&cr_enabled);
		cr_test = 1;
	}

	lock_slurmctld(job_write_lock);
	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		*err_code = -700;
		*err_msg = "No such job";
		error("wiki: Failed to find job %u", jobid);
		rc = -1;
		goto fini;
	}

	if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) {
		*err_code = -700;
		*err_msg = "Job not pending, can't start";
		error("wiki: Attempt to start job %u in state %s",
			jobid, job_state_string(job_ptr->job_state));
		rc = -1;
		goto fini;
	}

	if (comment_ptr) {
		char *reserved = strstr(comment_ptr, "RESERVED:");
		if (reserved) {
			reserved += 9;
			job_ptr->details->reserved_resources =
				strtol(reserved, NULL, 10);
		}
		xfree(job_ptr->comment);
		job_ptr->comment = xstrdup(comment_ptr);
	}

	if (task_cnt) {
		new_node_list = xstrdup(hostlist);
		if (node_name2bitmap(new_node_list, false, &new_bitmap) != 0) {
			*err_code = -700;
			*err_msg = "Invalid TASKLIST";
			error("wiki: Attempt to set invalid node list for "
				"job %u, %s",
				jobid, hostlist);
			xfree(new_node_list);
			rc = -1;
			goto fini;
		}

		if (!bit_super_set(new_bitmap, avail_node_bitmap)) {
			/* Selected node is UP and not responding
			 * or it just went DOWN */
			*err_code = -700;
			*err_msg = "TASKLIST includes non-responsive node";
			error("wiki: Attempt to use non-responsive nodes for "
				"job %u, %s",
				jobid, hostlist);
			xfree(new_node_list);
			FREE_NULL_BITMAP(new_bitmap);
			rc = -1;
			goto fini;
		}

		/* User excluded node list incompatible with Wiki
		 * Exclude all nodes not explicitly requested */
		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);
		job_ptr->details->exc_node_bitmap = bit_copy(new_bitmap);
		bit_not(job_ptr->details->exc_node_bitmap);
	}

	/* Build layout information from tasklist (assuming that Moab
	 * sends a non-bracketed list of nodes, repeated as many times
	 * as cpus should be used per node); at this point, node names
	 * are comma-separated. This is _not_ a fast algorithm as it
	 * performs many string compares. */
	xfree(job_ptr->details->req_node_layout);
	if (task_cnt && cr_enabled) {
		uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task);
		job_ptr->details->req_node_layout = (uint16_t *)
			xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t));
		bsize = bit_size(new_bitmap);
		for (i = 0, ll = -1; i < bsize; i++) {
			if (!bit_test(new_bitmap, i))
				continue;
			ll++;
			node_name = node_record_table_ptr[i].name;
			node_name_len  = strlen(node_name);
			if (node_name_len == 0)
				continue;
			node_cur = tasklist;
			while (*node_cur) {
				if ((node_idx = strstr(node_cur, node_name))) {
					if ((node_idx[node_name_len] == ',') ||
				 	    (node_idx[node_name_len] == '\0')) {
						job_ptr->details->
							req_node_layout[ll] +=
							cpus_per_task;
					}
					node_cur = strchr(node_idx, ',');
					if (node_cur)
						continue;
				}
				break;
			}
		}
	}

	/* save and update job state to start now */
	save_req_nodes = job_ptr->details->req_nodes;
	job_ptr->details->req_nodes = new_node_list;
	save_req_bitmap = job_ptr->details->req_node_bitmap;
	job_ptr->details->req_node_bitmap = new_bitmap;
	old_task_cnt = job_ptr->details->min_cpus;
	job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt);
	job_ptr->priority = 100000000;

 fini:	unlock_slurmctld(job_write_lock);
	if (rc)
		return rc;

	/* No errors so far */
	(void) schedule(INFINITE);	/* provides own locking */

	/* Check to insure the job was actually started */
	lock_slurmctld(job_write_lock);
	if (job_ptr->job_id != jobid)
		job_ptr = find_job_record(jobid);

	if (job_ptr && (job_ptr->job_id == jobid) &&
	    (!IS_JOB_RUNNING(job_ptr))) {
		uint16_t wait_reason = 0;
		char *wait_string;

		if (IS_JOB_FAILED(job_ptr))
			wait_string = "Invalid request, job aborted";
		else {
			wait_reason = job_ptr->state_reason;
			if (wait_reason == WAIT_HELD) {
				/* some job is completing, slurmctld did
				 * not even try to schedule this job */
				wait_reason = WAIT_RESOURCES;
			}
			wait_string = job_reason_string(wait_reason);
			job_ptr->state_reason = WAIT_HELD;
			xfree(job_ptr->state_desc);
		}
		*err_code = -910 - wait_reason;
		snprintf(tmp_msg, sizeof(tmp_msg),
			"Could not start job %u(%s): %s",
			jobid, new_node_list, wait_string);
		*err_msg = tmp_msg;
		error("wiki: %s", tmp_msg);

		/* restore some of job state */
		job_ptr->priority = 0;
		job_ptr->details->min_cpus = old_task_cnt;
		rc = -1;
	}

	if (job_ptr && (job_ptr->job_id == jobid) && job_ptr->details) {
		/* Restore required node list in case job requeued */
		xfree(job_ptr->details->req_nodes);
		job_ptr->details->req_nodes = save_req_nodes;
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
		job_ptr->details->req_node_bitmap = save_req_bitmap;
		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);
		xfree(job_ptr->details->req_node_layout);
	} else {
		error("wiki: start_job(%u) job missing", jobid);
		xfree(save_req_nodes);
		FREE_NULL_BITMAP(save_req_bitmap);
	}

	unlock_slurmctld(job_write_lock);
	schedule_node_save();	/* provides own locking */
	schedule_job_save();	/* provides own locking */
	return rc;
}
Exemple #4
0
/*
 * slurm_sprint_job_info - output information about a specific Slurm
 *	job based upon message as loaded using slurm_load_jobs
 * IN job_ptr - an individual job information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
extern char *
slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner )
{
	int i, j;
	char time_str[32], *group_name, *user_name;
	char tmp1[128], tmp2[128], tmp3[128], tmp4[128], tmp5[128], *tmp6_ptr;
	char tmp_line[512];
	char *ionodes = NULL;
	uint16_t exit_status = 0, term_sig = 0;
	job_resources_t *job_resrcs = job_ptr->job_resrcs;
	char *out = NULL;
	time_t run_time;
	uint32_t min_nodes, max_nodes = 0;
	char *nodelist = "NodeList";
	bitstr_t *core_bitmap;
	char *host;
	int sock_inx, sock_reps, last;
	int abs_node_inx, rel_node_inx;
	int bit_inx, bit_reps;
	uint32_t *last_mem_alloc_ptr = NULL;
	uint32_t last_mem_alloc = NO_VAL;
	char *last_hosts;
	hostlist_t hl, hl_last;
	char select_buf[122];
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	if (cluster_flags & CLUSTER_FLAG_BG) {
		nodelist = "MidplaneList";
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_IONODES,
					    &ionodes);
	}

	/****** Line 1 ******/
	snprintf(tmp_line, sizeof(tmp_line), "JobId=%u ", job_ptr->job_id);
	out = xstrdup(tmp_line);
	if (job_ptr->array_job_id) {
		snprintf(tmp_line, sizeof(tmp_line), 
			 "ArrayJobId=%u ArrayTaskId=%u ",
			 job_ptr->array_job_id, job_ptr->array_task_id);
		xstrcat(out, tmp_line);
	}
	snprintf(tmp_line, sizeof(tmp_line), "Name=%s", job_ptr->name);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 2 ******/
	user_name = uid_to_string((uid_t) job_ptr->user_id);
	group_name = gid_to_string((gid_t) job_ptr->group_id);
	snprintf(tmp_line, sizeof(tmp_line),
		 "UserId=%s(%u) GroupId=%s(%u)",
		 user_name, job_ptr->user_id, group_name, job_ptr->group_id);
	xfree(user_name);
	xfree(group_name);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 3 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "Priority=%u Account=%s QOS=%s",
		 job_ptr->priority, job_ptr->account, job_ptr->qos);
	xstrcat(out, tmp_line);
	if (slurm_get_track_wckey()) {
		snprintf(tmp_line, sizeof(tmp_line),
			 " WCKey=%s", job_ptr->wckey);
		xstrcat(out, tmp_line);
	}
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 4 ******/
	if (job_ptr->state_desc) {
		/* Replace white space with underscore for easier parsing */
		for (j=0; job_ptr->state_desc[j]; j++) {
			if (isspace((int)job_ptr->state_desc[j]))
				job_ptr->state_desc[j] = '_';
		}
		tmp6_ptr = job_ptr->state_desc;
	} else
		tmp6_ptr = job_reason_string(job_ptr->state_reason);
	snprintf(tmp_line, sizeof(tmp_line),
		 "JobState=%s Reason=%s Dependency=%s",
		 job_state_string(job_ptr->job_state), tmp6_ptr,
		 job_ptr->dependency);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 5 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "Requeue=%u Restarts=%u BatchFlag=%u ",
		 job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag);
	xstrcat(out, tmp_line);
	if (WIFSIGNALED(job_ptr->exit_code))
		term_sig = WTERMSIG(job_ptr->exit_code);
	exit_status = WEXITSTATUS(job_ptr->exit_code);
	snprintf(tmp_line, sizeof(tmp_line),
		 "ExitCode=%u:%u", exit_status, term_sig);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 5a (optional) ******/
	if (!(job_ptr->show_flags & SHOW_DETAIL))
		goto line6;
	if (WIFSIGNALED(job_ptr->derived_ec))
		term_sig = WTERMSIG(job_ptr->derived_ec);
	else
		term_sig = 0;
	exit_status = WEXITSTATUS(job_ptr->derived_ec);
	snprintf(tmp_line, sizeof(tmp_line),
		 "DerivedExitCode=%u:%u", exit_status, term_sig);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 6 ******/
line6:
	snprintf(tmp_line, sizeof(tmp_line), "RunTime=");
	xstrcat(out, tmp_line);
	if (IS_JOB_PENDING(job_ptr))
		run_time = 0;
	else if (IS_JOB_SUSPENDED(job_ptr))
		run_time = job_ptr->pre_sus_time;
	else {
		time_t end_time;
		if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0))
			end_time = time(NULL);
		else
			end_time = job_ptr->end_time;
		if (job_ptr->suspend_time) {
			run_time = (time_t)
				(difftime(end_time, job_ptr->suspend_time)
				 + job_ptr->pre_sus_time);
		} else
			run_time = (time_t)
				difftime(end_time, job_ptr->start_time);
	}
	secs2time_str(run_time, tmp1, sizeof(tmp1));
	sprintf(tmp_line, "%s ", tmp1);
	xstrcat(out, tmp_line);

	snprintf(tmp_line, sizeof(tmp_line), "TimeLimit=");
	xstrcat(out, tmp_line);
	if (job_ptr->time_limit == NO_VAL)
		sprintf(tmp_line, "Partition_Limit");
	else {
		mins2time_str(job_ptr->time_limit, tmp_line,
			      sizeof(tmp_line));
	}
	xstrcat(out, tmp_line);
	snprintf(tmp_line, sizeof(tmp_line), " TimeMin=");
	xstrcat(out, tmp_line);
	if (job_ptr->time_min == 0)
		sprintf(tmp_line, "N/A");
	else {
		mins2time_str(job_ptr->time_min, tmp_line,
			      sizeof(tmp_line));
	}
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 7 ******/
	slurm_make_time_str((time_t *)&job_ptr->submit_time, time_str,
			    sizeof(time_str));
	snprintf(tmp_line, sizeof(tmp_line), "SubmitTime=%s ", time_str);
	xstrcat(out, tmp_line);

	slurm_make_time_str((time_t *)&job_ptr->eligible_time, time_str,
			    sizeof(time_str));
	snprintf(tmp_line, sizeof(tmp_line), "EligibleTime=%s", time_str);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 8 (optional) ******/
	if (job_ptr->resize_time) {
		slurm_make_time_str((time_t *)&job_ptr->resize_time, time_str,
				    sizeof(time_str));
		snprintf(tmp_line, sizeof(tmp_line), "ResizeTime=%s", time_str);
		xstrcat(out, tmp_line);
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
	}

	/****** Line 9 ******/
	slurm_make_time_str((time_t *)&job_ptr->start_time, time_str,
			    sizeof(time_str));
	snprintf(tmp_line, sizeof(tmp_line), "StartTime=%s ", time_str);
	xstrcat(out, tmp_line);

	snprintf(tmp_line, sizeof(tmp_line), "EndTime=");
	xstrcat(out, tmp_line);
	if ((job_ptr->time_limit == INFINITE) &&
	    (job_ptr->end_time > time(NULL)))
		sprintf(tmp_line, "Unknown");
	else {
		slurm_make_time_str ((time_t *)&job_ptr->end_time, time_str,
				     sizeof(time_str));
		sprintf(tmp_line, "%s", time_str);
	}
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 10 ******/
	if (job_ptr->preempt_time == 0)
		sprintf(tmp_line, "PreemptTime=None ");
	else {
		slurm_make_time_str((time_t *)&job_ptr->preempt_time,
				    time_str, sizeof(time_str));
		snprintf(tmp_line, sizeof(tmp_line), "PreemptTime=%s ",
			 time_str);
	}
	xstrcat(out, tmp_line);
	if (job_ptr->suspend_time) {
		slurm_make_time_str ((time_t *)&job_ptr->suspend_time,
				     time_str, sizeof(time_str));
	} else {
		strncpy(time_str, "None", sizeof(time_str));
	}
	snprintf(tmp_line, sizeof(tmp_line),
		 "SuspendTime=%s SecsPreSuspend=%ld",
		 time_str, (long int)job_ptr->pre_sus_time);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 11 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "Partition=%s AllocNode:Sid=%s:%u",
		 job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 12 ******/
	snprintf(tmp_line, sizeof(tmp_line), "Req%s=%s Exc%s=%s",
		 nodelist, job_ptr->req_nodes, nodelist, job_ptr->exc_nodes);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 13 ******/
	xstrfmtcat(out, "%s=", nodelist);
	xstrcat(out, job_ptr->nodes);
	if (job_ptr->nodes && ionodes) {
		snprintf(tmp_line, sizeof(tmp_line), "[%s]", ionodes);
		xstrcat(out, tmp_line);
		xfree(ionodes);
	}
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 14 (optional) ******/
	if (job_ptr->batch_host) {
		snprintf(tmp_line, sizeof(tmp_line), "BatchHost=%s",
			 job_ptr->batch_host);
		xstrcat(out, tmp_line);
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
	}

	/****** Line 15 ******/
	if (cluster_flags & CLUSTER_FLAG_BG) {
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &min_nodes);
		if ((min_nodes == 0) || (min_nodes == NO_VAL)) {
			min_nodes = job_ptr->num_nodes;
			max_nodes = job_ptr->max_nodes;
		} else if (job_ptr->max_nodes)
			max_nodes = min_nodes;
	} else {
		min_nodes = job_ptr->num_nodes;
		max_nodes = job_ptr->max_nodes;
	}

	_sprint_range(tmp1, sizeof(tmp1), job_ptr->num_cpus, job_ptr->max_cpus);
	_sprint_range(tmp2, sizeof(tmp2), min_nodes, max_nodes);
	if (job_ptr->sockets_per_node == (uint16_t) NO_VAL)
		strcpy(tmp3, "*");
	else
		snprintf(tmp3, sizeof(tmp3), "%u", job_ptr->sockets_per_node);
	if (job_ptr->cores_per_socket == (uint16_t) NO_VAL)
		strcpy(tmp4, "*");
	else
		snprintf(tmp4, sizeof(tmp4), "%u", job_ptr->cores_per_socket);
	if (job_ptr->threads_per_core == (uint16_t) NO_VAL)
		strcpy(tmp5, "*");
	else
		snprintf(tmp5, sizeof(tmp5), "%u", job_ptr->threads_per_core);
	snprintf(tmp_line, sizeof(tmp_line),
		 "NumNodes=%s NumCPUs=%s CPUs/Task=%u ReqS:C:T=%s:%s:%s",
		 tmp2, tmp1, job_ptr->cpus_per_task, tmp3, tmp4, tmp5);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	if (!job_resrcs)
		goto line15;

	if (cluster_flags & CLUSTER_FLAG_BG) {
		if ((job_resrcs->cpu_array_cnt > 0) &&
		    (job_resrcs->cpu_array_value) &&
		    (job_resrcs->cpu_array_reps)) {
			int length = 0;
			xstrcat(out, "CPUs=");
			length += 10;
			for (i = 0; i < job_resrcs->cpu_array_cnt; i++) {
				if (length > 70) {
					/* skip to last CPU group entry */
					if (i < job_resrcs->cpu_array_cnt - 1) {
						continue;
					}
					/* add ellipsis before last entry */
					xstrcat(out, "...,");
					length += 4;
				}

				snprintf(tmp_line, sizeof(tmp_line), "%d",
					 job_resrcs->cpus[i]);
				xstrcat(out, tmp_line);
				length += strlen(tmp_line);
				if (job_resrcs->cpu_array_reps[i] > 1) {
					snprintf(tmp_line, sizeof(tmp_line),
						 "*%d",
						 job_resrcs->cpu_array_reps[i]);
					xstrcat(out, tmp_line);
					length += strlen(tmp_line);
				}
				if (i < job_resrcs->cpu_array_cnt - 1) {
					xstrcat(out, ",");
					length++;
				}
			}
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
		}
	} else {
		if (!job_resrcs->core_bitmap)
			goto line15;

		last  = bit_fls(job_resrcs->core_bitmap);
		if (last == -1)
			goto line15;

		hl = hostlist_create(job_ptr->nodes);
		if (!hl) {
			error("slurm_sprint_job_info: hostlist_create: %s",
			      job_ptr->nodes);
			return NULL;
		}
		hl_last = hostlist_create(NULL);
		if (!hl_last) {
			error("slurm_sprint_job_info: hostlist_create: NULL");
			hostlist_destroy(hl);
			return NULL;
		}

		bit_inx = 0;
		i = sock_inx = sock_reps = 0;
		abs_node_inx = job_ptr->node_inx[i];

/*	tmp1[] stores the current cpu(s) allocated	*/
		tmp2[0] = '\0';	/* stores last cpu(s) allocated */
		for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts;
		     rel_node_inx++) {

			if (sock_reps >=
			    job_resrcs->sock_core_rep_count[sock_inx]) {
				sock_inx++;
				sock_reps = 0;
			}
			sock_reps++;

			bit_reps = job_resrcs->sockets_per_node[sock_inx] *
				job_resrcs->cores_per_socket[sock_inx];

			core_bitmap = bit_alloc(bit_reps);
			for (j=0; j < bit_reps; j++) {
				if (bit_test(job_resrcs->core_bitmap, bit_inx))
					bit_set(core_bitmap, j);
				bit_inx++;
			}

			bit_fmt(tmp1, sizeof(tmp1), core_bitmap);
			FREE_NULL_BITMAP(core_bitmap);
			host = hostlist_shift(hl);
/*
 *		If the allocation values for this host are not the same as the
 *		last host, print the report of the last group of hosts that had
 *		identical allocation values.
 */
			if (strcmp(tmp1, tmp2) ||
			    (last_mem_alloc_ptr != job_resrcs->memory_allocated) ||
			    (job_resrcs->memory_allocated &&
			     (last_mem_alloc !=
			      job_resrcs->memory_allocated[rel_node_inx]))) {
				if (hostlist_count(hl_last)) {
					last_hosts = 
						hostlist_ranged_string_xmalloc(
						hl_last);
					snprintf(tmp_line, sizeof(tmp_line),
						 "  Nodes=%s CPU_IDs=%s Mem=%u",
						 last_hosts, tmp2,
						 last_mem_alloc_ptr ?
						 last_mem_alloc : 0);
					xfree(last_hosts);
					xstrcat(out, tmp_line);
					if (one_liner)
						xstrcat(out, " ");
					else
						xstrcat(out, "\n   ");

					hostlist_destroy(hl_last);
					hl_last = hostlist_create(NULL);
				}
				strcpy(tmp2, tmp1);
				last_mem_alloc_ptr = job_resrcs->memory_allocated;
				if (last_mem_alloc_ptr)
					last_mem_alloc = job_resrcs->
						memory_allocated[rel_node_inx];
				else
					last_mem_alloc = NO_VAL;
			}
			hostlist_push_host(hl_last, host);
			free(host);

			if (bit_inx > last)
				break;

			if (abs_node_inx > job_ptr->node_inx[i+1]) {
				i += 2;
				abs_node_inx = job_ptr->node_inx[i];
			} else {
				abs_node_inx++;
			}
		}

		if (hostlist_count(hl_last)) {
			last_hosts = hostlist_ranged_string_xmalloc(hl_last);
			snprintf(tmp_line, sizeof(tmp_line),
				 "  Nodes=%s CPU_IDs=%s Mem=%u",
				 last_hosts, tmp2,
				 last_mem_alloc_ptr ? last_mem_alloc : 0);
			xfree(last_hosts);
			xstrcat(out, tmp_line);
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
		}
		hostlist_destroy(hl);
		hostlist_destroy(hl_last);
	}
	/****** Line 15 ******/
line15:
	if (job_ptr->pn_min_memory & MEM_PER_CPU) {
		job_ptr->pn_min_memory &= (~MEM_PER_CPU);
		tmp6_ptr = "CPU";
	} else
		tmp6_ptr = "Node";

	if (cluster_flags & CLUSTER_FLAG_BG) {
		convert_num_unit((float)job_ptr->pn_min_cpus,
				 tmp1, sizeof(tmp1), UNIT_NONE);
		snprintf(tmp_line, sizeof(tmp_line), "MinCPUsNode=%s",	tmp1);
	} else {
		snprintf(tmp_line, sizeof(tmp_line), "MinCPUsNode=%u",
			 job_ptr->pn_min_cpus);
	}

	xstrcat(out, tmp_line);
	convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1),
			 UNIT_MEGA);
	convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2),
			 UNIT_MEGA);
	snprintf(tmp_line, sizeof(tmp_line),
		 " MinMemory%s=%s MinTmpDiskNode=%s",
		 tmp6_ptr, tmp1, tmp2);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 16 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "Features=%s Gres=%s Reservation=%s",
		 job_ptr->features, job_ptr->gres, job_ptr->resv_name);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 17 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "Shared=%s Contiguous=%d Licenses=%s Network=%s",
		 (job_ptr->shared == 0 ? "0" :
		  job_ptr->shared == 1 ? "1" : "OK"),
		 job_ptr->contiguous, job_ptr->licenses, job_ptr->network);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 18 ******/
	snprintf(tmp_line, sizeof(tmp_line), "Command=%s",
		 job_ptr->command);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 19 ******/
	snprintf(tmp_line, sizeof(tmp_line), "WorkDir=%s",
		 job_ptr->work_dir);
	xstrcat(out, tmp_line);

	if (cluster_flags & CLUSTER_FLAG_BG) {
		/****** Line 20 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_BG_ID);
		if (select_buf[0] != '\0') {
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
			snprintf(tmp_line, sizeof(tmp_line),
				 "Block_ID=%s", select_buf);
			xstrcat(out, tmp_line);
		}

		/****** Line 21 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MIXED_SHORT);
		if (select_buf[0] != '\0') {
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
			xstrcat(out, select_buf);
		}

		if (cluster_flags & CLUSTER_FLAG_BGL) {
			/****** Line 22 (optional) ******/
			select_g_select_jobinfo_sprint(
				job_ptr->select_jobinfo,
				select_buf, sizeof(select_buf),
				SELECT_PRINT_BLRTS_IMAGE);
			if (select_buf[0] != '\0') {
				if (one_liner)
					xstrcat(out, " ");
				else
					xstrcat(out, "\n   ");
				snprintf(tmp_line, sizeof(tmp_line),
					 "BlrtsImage=%s", select_buf);
				xstrcat(out, tmp_line);
			}
		}
		/****** Line 23 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_LINUX_IMAGE);
		if (select_buf[0] != '\0') {
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
			if (cluster_flags & CLUSTER_FLAG_BGL)
				snprintf(tmp_line, sizeof(tmp_line),
					 "LinuxImage=%s", select_buf);
			else
				snprintf(tmp_line, sizeof(tmp_line),
					 "CnloadImage=%s", select_buf);

			xstrcat(out, tmp_line);
		}
		/****** Line 24 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MLOADER_IMAGE);
		if (select_buf[0] != '\0') {
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
			snprintf(tmp_line, sizeof(tmp_line),
				 "MloaderImage=%s", select_buf);
			xstrcat(out, tmp_line);
		}
		/****** Line 25 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_RAMDISK_IMAGE);
		if (select_buf[0] != '\0') {
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
			if (cluster_flags & CLUSTER_FLAG_BGL)
				snprintf(tmp_line, sizeof(tmp_line),
					 "RamDiskImage=%s", select_buf);
			else
				snprintf(tmp_line, sizeof(tmp_line),
					 "IoloadImage=%s", select_buf);
			xstrcat(out, tmp_line);
		}
	}

	/****** Line 26 (optional) ******/
	if (job_ptr->comment) {
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
		snprintf(tmp_line, sizeof(tmp_line), "Comment=%s ",
			 job_ptr->comment);
		xstrcat(out, tmp_line);
	}

	/****** Line 27 (optional) ******/
	if (job_ptr->batch_script) {
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
		xstrcat(out, "BatchScript=\n");
		xstrcat(out, job_ptr->batch_script);
	}

	/****** Line 28 (optional) ******/
	if (job_ptr->req_switch) {
		char time_buf[32];
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
		secs2time_str((time_t) job_ptr->wait4switch, time_buf,
			      sizeof(time_buf));
		snprintf(tmp_line, sizeof(tmp_line), "Switches=%u@%s\n",
			 job_ptr->req_switch, time_buf);
		xstrcat(out, tmp_line);
	}

	/****** Line 29 (optional) ******/
	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;

}
static char *	_dump_job(struct job_record *job_ptr, time_t update_time)
{
	char *buf = NULL, tmp[16384];
	char *gname, *pname, *quote, *uname;
	uint32_t end_time, suspend_time;
	uint64_t min_mem;
	int i, rej_sent = 0;

	if (!job_ptr)
		return NULL;

	snprintf(tmp, sizeof(tmp), "%u:STATE=%s;",
		job_ptr->job_id, _get_job_state(job_ptr));
	xstrcat(buf, tmp);

	if (update_time > last_job_update)
		return buf;

	if (IS_JOB_PENDING(job_ptr)) {
		char *req_features = _get_job_features(job_ptr);
		if (req_features) {
			snprintf(tmp, sizeof(tmp),
				"RFEATURES=%s;", req_features);
			xstrcat(buf, tmp);
			xfree(req_features);
		}
		if ((job_ptr->details) &&
		    (job_ptr->details->req_nodes) &&
		    (job_ptr->details->req_nodes[0])) {
			char *hosts = bitmap2wiki_node_name(
				job_ptr->details->req_node_bitmap);
			snprintf(tmp, sizeof(tmp),
				"HOSTLIST=%s;", hosts);
			xstrcat(buf, tmp);
			xfree(hosts);
		}
		if ((job_ptr->details) &&
		    (job_ptr->details->exc_nodes) &&
		    (job_ptr->details->exc_nodes[0])) {
			char *hosts = bitmap2wiki_node_name(
				job_ptr->details->exc_node_bitmap);
			snprintf(tmp, sizeof(tmp),
				"EXCLUDE_HOSTLIST=%s;", hosts);
			xstrcat(buf, tmp);
			xfree(hosts);
		}
		if ((job_ptr->details) && (job_ptr->details->begin_time)) {
			snprintf(tmp, sizeof(tmp),
				"STARTDATE=%u;", (uint32_t)
				job_ptr->details->begin_time);
			xstrcat(buf, tmp);
		}
		if (job_ptr->details) {
			snprintf(tmp, sizeof(tmp),
				 "MAXNODES=%u;", _get_job_max_nodes(job_ptr));
			xstrcat(buf, tmp);
		}
	} else if (!IS_JOB_FINISHED(job_ptr)) {
		char *hosts;
		hosts = slurm_job2moab_task_list(job_ptr);
		xstrcat(buf, "TASKLIST=");
		xstrcat(buf, hosts);
		xstrcat(buf, ";");
		xfree(hosts);
	}

	if (reject_msg_cnt) {
		/* Possible job requeue/reject message */
		for (i=0; i<REJECT_MSG_MAX; i++) {
			if (reject_msgs[i].job_id != job_ptr->job_id)
				continue;
			snprintf(tmp, sizeof(tmp),
				"REJMESSAGE=\"%s\";",
				reject_msgs[i].reason);
			xstrcat(buf, tmp);
			reject_msgs[i].job_id = 0;
			reject_msg_cnt--;
			rej_sent = 1;
			break;
		}
	}
	if ((rej_sent == 0) && IS_JOB_FAILED(job_ptr)) {
		snprintf(tmp, sizeof(tmp),
			"REJMESSAGE=\"%s\";",
			job_reason_string(job_ptr->state_reason));
		xstrcat(buf, tmp);
	}

	if (!IS_JOB_FINISHED(job_ptr) && job_ptr->details &&
	    job_ptr->details->work_dir) {
		if ((quote = strchr(job_ptr->details->work_dir, (int) '\"'))) {
			/* Moab does not like strings containing a quote */
			*quote = '\0';
			snprintf(tmp, sizeof(tmp), "IWD=\"%s\";",
				 job_ptr->details->work_dir);
			*quote = '\"';
		} else {
			snprintf(tmp, sizeof(tmp), "IWD=\"%s\";",
				 job_ptr->details->work_dir);
		}
		xstrcat(buf, tmp);
	}

	if (job_ptr->batch_flag == 0)
		xstrcat(buf, "FLAGS=INTERACTIVE;");

	if (job_ptr->gres) {
		snprintf(tmp, sizeof(tmp),"GRES=\"%s\";", job_ptr->gres);
		xstrcat(buf, tmp);
	}

	if (job_ptr->resp_host) {
		snprintf(tmp, sizeof(tmp),"SUBMITHOST=\"%s\";", job_ptr->resp_host);
		xstrcat(buf, tmp);
	}

	if (job_ptr->wckey) {
		if ((quote = strchr(job_ptr->wckey, (int) '\"'))) {
			/* Moab does not like strings containing a quote */
			*quote = '\0';
			snprintf(tmp, sizeof(tmp),
				"WCKEY=\"%s\";", job_ptr->wckey);
			*quote = '\"';
			xstrcat(buf, tmp);
		} else {
			snprintf(tmp, sizeof(tmp),
				"WCKEY=\"%s\";", job_ptr->wckey);
		}
		xstrcat(buf, tmp);
	}

	snprintf(tmp, sizeof(tmp),
		"UPDATETIME=%u;WCLIMIT=%u;TASKS=%u;",
		(uint32_t) job_ptr->time_last_active,
		(uint32_t) _get_job_time_limit(job_ptr),
		_get_job_tasks(job_ptr));
	xstrcat(buf, tmp);

	if (!IS_JOB_FINISHED(job_ptr)) {
		snprintf(tmp, sizeof(tmp),
			"NODES=%u;",
			_get_pn_min_nodes(job_ptr));
		xstrcat(buf, tmp);
	}

	snprintf(tmp, sizeof(tmp),
		"DPROCS=%u;",
		_get_job_cpus_per_task(job_ptr));
	xstrcat(buf, tmp);

	if (job_ptr->part_ptr)
		pname = job_ptr->part_ptr->name;
	else
		pname = "UNKNOWN";	/* should never see this */
	snprintf(tmp, sizeof(tmp),
		"QUEUETIME=%u;STARTTIME=%u;RCLASS=\"%s\";",
		_get_job_submit_time(job_ptr),
		(uint32_t) job_ptr->start_time, pname);
	xstrcat(buf, tmp);

	min_mem = _get_pn_min_mem(job_ptr);
	if (min_mem & MEM_PER_CPU) {
		min_mem &= ~MEM_PER_CPU;
	}
	snprintf(tmp, sizeof(tmp),
		"RMEM=%"PRIu64";RDISK=%u;",
		 min_mem,
		_get_pn_min_disk(job_ptr));
	xstrcat(buf, tmp);

	_get_job_comment(job_ptr, tmp, sizeof(tmp));
	xstrcat(buf, tmp);

	end_time = _get_job_end_time(job_ptr);
	if (end_time) {
		snprintf(tmp, sizeof(tmp),
			"COMPLETETIME=%u;", end_time);
		xstrcat(buf, tmp);
	}

	suspend_time = _get_job_suspend_time(job_ptr);
	if (suspend_time) {
		snprintf(tmp, sizeof(tmp),
			"SUSPENDTIME=%u;", suspend_time);
		xstrcat(buf, tmp);
	}

	if (job_ptr->account) {
		snprintf(tmp, sizeof(tmp),
			"ACCOUNT=\"%s\";", job_ptr->account);
		xstrcat(buf, tmp);
	}

	if (job_ptr->name && (quote = strchr(job_ptr->name, (int) '\"'))) {
		/* Moab does not like job names containing a quote */
		*quote = '\0';
		snprintf(tmp, sizeof(tmp),
			"NAME=\"%s\";", job_ptr->name);
		*quote = '\"';
	} else {
		snprintf(tmp, sizeof(tmp),
			"NAME=\"%s\";", job_ptr->name);
	}
	xstrcat(buf, tmp);

	if (job_ptr->details &&
	    (update_time > job_ptr->details->submit_time))
		return buf;

	uname = uid_to_string((uid_t) job_ptr->user_id);
	gname = gid_to_string(job_ptr->group_id);
	snprintf(tmp, sizeof(tmp),
		"UNAME=%s;GNAME=%s;", uname, gname);
	xstrcat(buf, tmp);
	xfree(uname);
	xfree(gname);

	return buf;
}
Exemple #6
0
/*
 * slurm_sprint_job_info - output information about a specific Slurm
 *	job based upon message as loaded using slurm_load_jobs
 * IN job_ptr - an individual job information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
extern char *
slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner )
{
	int i, j, k;
	char time_str[32], *group_name, *user_name;
	char *gres_last = "", tmp1[128], tmp2[128];
	char *tmp6_ptr;
	char tmp_line[1024 * 128];
	char tmp_path[MAXPATHLEN];
	char *ionodes = NULL;
	uint16_t exit_status = 0, term_sig = 0;
	job_resources_t *job_resrcs = job_ptr->job_resrcs;
	char *out = NULL;
	time_t run_time;
	uint32_t min_nodes, max_nodes = 0;
	char *nodelist = "NodeList";
	bitstr_t *cpu_bitmap;
	char *host;
	int sock_inx, sock_reps, last;
	int abs_node_inx, rel_node_inx;
	int64_t nice;
	int bit_inx, bit_reps;
	uint64_t *last_mem_alloc_ptr = NULL;
	uint64_t last_mem_alloc = NO_VAL64;
	char *last_hosts;
	hostlist_t hl, hl_last;
	char select_buf[122];
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	uint32_t threads;
	char *line_end = (one_liner) ? " " : "\n   ";

	if (cluster_flags & CLUSTER_FLAG_BG) {
		nodelist = "MidplaneList";
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_IONODES,
					    &ionodes);
	}

	/****** Line 1 ******/
	xstrfmtcat(out, "JobId=%u ", job_ptr->job_id);

	if (job_ptr->array_job_id) {
		if (job_ptr->array_task_str) {
			xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%s ",
				   job_ptr->array_job_id,
				   job_ptr->array_task_str);
		} else {
			xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%u ",
				   job_ptr->array_job_id,
				   job_ptr->array_task_id);
		}
	}
	xstrfmtcat(out, "JobName=%s", job_ptr->name);
	xstrcat(out, line_end);

	/****** Line 2 ******/
	user_name = uid_to_string((uid_t) job_ptr->user_id);
	group_name = gid_to_string((gid_t) job_ptr->group_id);
	xstrfmtcat(out, "UserId=%s(%u) GroupId=%s(%u) MCS_label=%s",
		   user_name, job_ptr->user_id, group_name, job_ptr->group_id,
		   (job_ptr->mcs_label==NULL) ? "N/A" : job_ptr->mcs_label);
	xfree(user_name);
	xfree(group_name);
	xstrcat(out, line_end);

	/****** Line 3 ******/
	nice = ((int64_t)job_ptr->nice) - NICE_OFFSET;
	xstrfmtcat(out, "Priority=%u Nice=%"PRIi64" Account=%s QOS=%s",
		   job_ptr->priority, nice, job_ptr->account, job_ptr->qos);
	if (slurm_get_track_wckey())
		xstrfmtcat(out, " WCKey=%s", job_ptr->wckey);
	xstrcat(out, line_end);

	/****** Line 4 ******/
	xstrfmtcat(out, "JobState=%s ", job_state_string(job_ptr->job_state));

	if (job_ptr->state_desc) {
		/* Replace white space with underscore for easier parsing */
		for (j=0; job_ptr->state_desc[j]; j++) {
			if (isspace((int)job_ptr->state_desc[j]))
				job_ptr->state_desc[j] = '_';
		}
		xstrfmtcat(out, "Reason=%s ", job_ptr->state_desc);
	} else
		xstrfmtcat(out, "Reason=%s ", job_reason_string(job_ptr->state_reason));

	xstrfmtcat(out, "Dependency=%s", job_ptr->dependency);
	xstrcat(out, line_end);

	/****** Line 5 ******/
	xstrfmtcat(out, "Requeue=%u Restarts=%u BatchFlag=%u Reboot=%u ",
		 job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag,
		 job_ptr->reboot);
	if (WIFSIGNALED(job_ptr->exit_code))
		term_sig = WTERMSIG(job_ptr->exit_code);
	exit_status = WEXITSTATUS(job_ptr->exit_code);
	xstrfmtcat(out, "ExitCode=%u:%u", exit_status, term_sig);
	xstrcat(out, line_end);

	/****** Line 5a (optional) ******/
	if (job_ptr->show_flags & SHOW_DETAIL) {
		if (WIFSIGNALED(job_ptr->derived_ec))
			term_sig = WTERMSIG(job_ptr->derived_ec);
		else
			term_sig = 0;
		exit_status = WEXITSTATUS(job_ptr->derived_ec);
		xstrfmtcat(out, "DerivedExitCode=%u:%u", exit_status, term_sig);
		xstrcat(out, line_end);
	}

	/****** Line 6 ******/
	if (IS_JOB_PENDING(job_ptr))
		run_time = 0;
	else if (IS_JOB_SUSPENDED(job_ptr))
		run_time = job_ptr->pre_sus_time;
	else {
		time_t end_time;
		if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0))
			end_time = time(NULL);
		else
			end_time = job_ptr->end_time;
		if (job_ptr->suspend_time) {
			run_time = (time_t)
				(difftime(end_time, job_ptr->suspend_time)
				 + job_ptr->pre_sus_time);
		} else
			run_time = (time_t)
				difftime(end_time, job_ptr->start_time);
	}
	secs2time_str(run_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "RunTime=%s ", time_str);

	if (job_ptr->time_limit == NO_VAL)
		xstrcat(out, "TimeLimit=Partition_Limit ");
	else {
		mins2time_str(job_ptr->time_limit, time_str, sizeof(time_str));
		xstrfmtcat(out, "TimeLimit=%s ", time_str);
	}

	if (job_ptr->time_min == 0)
		xstrcat(out, "TimeMin=N/A");
	else {
		mins2time_str(job_ptr->time_min, time_str, sizeof(time_str));
		xstrfmtcat(out, "TimeMin=%s", time_str);
	}
	xstrcat(out, line_end);

	/****** Line 7 ******/
	slurm_make_time_str(&job_ptr->submit_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "SubmitTime=%s ", time_str);

	slurm_make_time_str(&job_ptr->eligible_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "EligibleTime=%s", time_str);

	xstrcat(out, line_end);

	/****** Line 8 (optional) ******/
	if (job_ptr->resize_time) {
		slurm_make_time_str(&job_ptr->resize_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "ResizeTime=%s", time_str);
		xstrcat(out, line_end);
	}

	/****** Line 9 ******/
	slurm_make_time_str(&job_ptr->start_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "StartTime=%s ", time_str);

	if ((job_ptr->time_limit == INFINITE) &&
	    (job_ptr->end_time > time(NULL)))
		xstrcat(out, "EndTime=Unknown ");
	else {
		slurm_make_time_str(&job_ptr->end_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "EndTime=%s ", time_str);
	}

	if (job_ptr->deadline) {
		slurm_make_time_str(&job_ptr->deadline, time_str, sizeof(time_str));
		xstrfmtcat(out, "Deadline=%s", time_str);
	} else {
		xstrcat(out, "Deadline=N/A");
	}

	xstrcat(out, line_end);

	/****** Line 10 ******/
	if (job_ptr->preempt_time == 0)
		xstrcat(out, "PreemptTime=None ");
	else {
		slurm_make_time_str(&job_ptr->preempt_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "PreemptTime=%s ", time_str);
	}

	if (job_ptr->suspend_time) {
		slurm_make_time_str(&job_ptr->suspend_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "SuspendTime=%s ", time_str);
	} else
		xstrcat(out, "SuspendTime=None ");

	xstrfmtcat(out, "SecsPreSuspend=%ld", (long int)job_ptr->pre_sus_time);
	xstrcat(out, line_end);

	/****** Line 11 ******/
	xstrfmtcat(out, "Partition=%s AllocNode:Sid=%s:%u",
		   job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid);
	xstrcat(out, line_end);

	/****** Line 12 ******/
	xstrfmtcat(out, "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes,
		   nodelist, job_ptr->exc_nodes);
	xstrcat(out, line_end);

	/****** Line 13 ******/
	xstrfmtcat(out, "%s=%s", nodelist, job_ptr->nodes);
	if (job_ptr->nodes && ionodes) {
		xstrfmtcat(out, "[%s]", ionodes);
		xfree(ionodes);
	}
	if (job_ptr->sched_nodes)
		xstrfmtcat(out, " Sched%s=%s", nodelist, job_ptr->sched_nodes);

	xstrcat(out, line_end);

	/****** Line 14 (optional) ******/
	if (job_ptr->batch_host) {
		xstrfmtcat(out, "BatchHost=%s", job_ptr->batch_host);
		xstrcat(out, line_end);
	}

	/****** Line 14a (optional) ******/
	if (job_ptr->fed_siblings) {
		xstrfmtcat(out, "FedOrigin=%s FedSiblings=%s",
			   job_ptr->fed_origin_str, job_ptr->fed_siblings_str);
		xstrcat(out, line_end);
	}

	/****** Line 15 ******/
	if (cluster_flags & CLUSTER_FLAG_BG) {
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &min_nodes);
		if ((min_nodes == 0) || (min_nodes == NO_VAL)) {
			min_nodes = job_ptr->num_nodes;
			max_nodes = job_ptr->max_nodes;
		} else if (job_ptr->max_nodes)
			max_nodes = min_nodes;
	} else if (IS_JOB_PENDING(job_ptr)) {
		min_nodes = job_ptr->num_nodes;
		max_nodes = job_ptr->max_nodes;
		if (max_nodes && (max_nodes < min_nodes))
			min_nodes = max_nodes;
	} else {
		min_nodes = job_ptr->num_nodes;
		max_nodes = 0;
	}

	_sprint_range(tmp_line, sizeof(tmp_line), min_nodes, max_nodes);
	xstrfmtcat(out, "NumNodes=%s ", tmp_line);
	_sprint_range(tmp_line, sizeof(tmp_line), job_ptr->num_cpus, job_ptr->max_cpus);
	xstrfmtcat(out, "NumCPUs=%s ", tmp_line);

	xstrfmtcat(out, "NumTasks=%u ", job_ptr->num_tasks);
	xstrfmtcat(out, "CPUs/Task=%u ", job_ptr->cpus_per_task);

	if (job_ptr->boards_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "ReqB:S:C:T=*:");
	else
		xstrfmtcat(out, "ReqB:S:C:T=%u:", job_ptr->boards_per_node);

	if (job_ptr->sockets_per_board == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->sockets_per_board);

	if (job_ptr->cores_per_socket == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->cores_per_socket);

	if (job_ptr->threads_per_core == (uint16_t) NO_VAL)
		xstrcat(out, "*");
	else
		xstrfmtcat(out, "%u", job_ptr->threads_per_core);

	xstrcat(out, line_end);

	/****** Line 16 ******/
	/* Tres should already of been converted at this point from simple */
	xstrfmtcat(out, "TRES=%s",
		   job_ptr->tres_alloc_str ? job_ptr->tres_alloc_str
					   : job_ptr->tres_req_str);
	xstrcat(out, line_end);

	/****** Line 17 ******/
	if (job_ptr->sockets_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "Socks/Node=* ");
	else
		xstrfmtcat(out, "Socks/Node=%u ", job_ptr->sockets_per_node);

	if (job_ptr->ntasks_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "NtasksPerN:B:S:C=*:");
	else
		xstrfmtcat(out, "NtasksPerN:B:S:C=%u:", job_ptr->ntasks_per_node);

	if (job_ptr->ntasks_per_board == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->ntasks_per_board);

	if ((job_ptr->ntasks_per_socket == (uint16_t) NO_VAL) ||
	    (job_ptr->ntasks_per_socket == (uint16_t) INFINITE))
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->ntasks_per_socket);

	if ((job_ptr->ntasks_per_core == (uint16_t) NO_VAL) ||
	    (job_ptr->ntasks_per_core == (uint16_t) INFINITE))
		xstrcat(out, "* ");
	else
		xstrfmtcat(out, "%u ", job_ptr->ntasks_per_core);

	if (job_ptr->core_spec == (uint16_t) NO_VAL)
		xstrcat(out, "CoreSpec=*");
	else if (job_ptr->core_spec & CORE_SPEC_THREAD)
		xstrfmtcat(out, "ThreadSpec=%d",
			   (job_ptr->core_spec & (~CORE_SPEC_THREAD)));
	else
		xstrfmtcat(out, "CoreSpec=%u", job_ptr->core_spec);

	xstrcat(out, line_end);

	if (job_resrcs && cluster_flags & CLUSTER_FLAG_BG) {
		if ((job_resrcs->cpu_array_cnt > 0) &&
		    (job_resrcs->cpu_array_value) &&
		    (job_resrcs->cpu_array_reps)) {
			int length = 0;
			xstrcat(out, "CPUs=");
			for (i = 0; i < job_resrcs->cpu_array_cnt; i++) {
				/* only print 60 characters worth of this record */
				if (length > 60) {
					/* skip to last CPU group entry */
					if (i < job_resrcs->cpu_array_cnt - 1) {
						continue;
					}
					/* add ellipsis before last entry */
					xstrcat(out, "...,");
				}

				length += xstrfmtcat(out, "%d", job_resrcs->cpus[i]);
				if (job_resrcs->cpu_array_reps[i] > 1) {
					length += xstrfmtcat(out, "*%d",
							     job_resrcs->cpu_array_reps[i]);
				}
				if (i < job_resrcs->cpu_array_cnt - 1) {
					xstrcat(out, ",");
					length++;
				}
			}
			xstrcat(out, line_end);
		}
	} else if (job_resrcs && job_resrcs->core_bitmap &&
		   ((last = bit_fls(job_resrcs->core_bitmap)) != -1)) {
		hl = hostlist_create(job_resrcs->nodes);
		if (!hl) {
			error("slurm_sprint_job_info: hostlist_create: %s",
			      job_resrcs->nodes);
			return NULL;
		}
		hl_last = hostlist_create(NULL);
		if (!hl_last) {
			error("slurm_sprint_job_info: hostlist_create: NULL");
			hostlist_destroy(hl);
			return NULL;
		}

		bit_inx = 0;
		i = sock_inx = sock_reps = 0;
		abs_node_inx = job_ptr->node_inx[i];

		gres_last = "";
		/* tmp1[] stores the current cpu(s) allocated */
		tmp2[0] = '\0';	/* stores last cpu(s) allocated */
		for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts;
		     rel_node_inx++) {

			if (sock_reps >=
			    job_resrcs->sock_core_rep_count[sock_inx]) {
				sock_inx++;
				sock_reps = 0;
			}
			sock_reps++;

			bit_reps = job_resrcs->sockets_per_node[sock_inx] *
				   job_resrcs->cores_per_socket[sock_inx];
			host = hostlist_shift(hl);
			threads = _threads_per_core(host);
			cpu_bitmap = bit_alloc(bit_reps * threads);
			for (j = 0; j < bit_reps; j++) {
				if (bit_test(job_resrcs->core_bitmap, bit_inx)){
					for (k = 0; k < threads; k++)
						bit_set(cpu_bitmap,
							(j * threads) + k);
				}
				bit_inx++;
			}
			bit_fmt(tmp1, sizeof(tmp1), cpu_bitmap);
			FREE_NULL_BITMAP(cpu_bitmap);
			/*
			 * If the allocation values for this host are not the
			 * same as the last host, print the report of the last
			 * group of hosts that had identical allocation values.
			 */
			if (xstrcmp(tmp1, tmp2) ||
			    ((rel_node_inx < job_ptr->gres_detail_cnt) &&
			     xstrcmp(job_ptr->gres_detail_str[rel_node_inx],
				     gres_last)) ||
			    (last_mem_alloc_ptr != job_resrcs->memory_allocated) ||
			    (job_resrcs->memory_allocated &&
			     (last_mem_alloc !=
			      job_resrcs->memory_allocated[rel_node_inx]))) {
				if (hostlist_count(hl_last)) {
					last_hosts =
						hostlist_ranged_string_xmalloc(
						hl_last);
					xstrfmtcat(out,
						   "  Nodes=%s CPU_IDs=%s "
						   "Mem=%"PRIu64" GRES_IDX=%s",
						   last_hosts, tmp2,
						   last_mem_alloc_ptr ?
						   last_mem_alloc : 0,
						    gres_last);
					xfree(last_hosts);
					xstrcat(out, line_end);

					hostlist_destroy(hl_last);
					hl_last = hostlist_create(NULL);
				}
				strcpy(tmp2, tmp1);
				if (rel_node_inx < job_ptr->gres_detail_cnt) {
					gres_last = job_ptr->
						    gres_detail_str[rel_node_inx];
				} else {
					gres_last = "";
				}
				last_mem_alloc_ptr = job_resrcs->memory_allocated;
				if (last_mem_alloc_ptr)
					last_mem_alloc = job_resrcs->
						memory_allocated[rel_node_inx];
				else
					last_mem_alloc = NO_VAL64;
			}
			hostlist_push_host(hl_last, host);
			free(host);

			if (bit_inx > last)
				break;

			if (abs_node_inx > job_ptr->node_inx[i+1]) {
				i += 2;
				abs_node_inx = job_ptr->node_inx[i];
			} else {
				abs_node_inx++;
			}
		}

		if (hostlist_count(hl_last)) {
			last_hosts = hostlist_ranged_string_xmalloc(hl_last);
			xstrfmtcat(out, "  Nodes=%s CPU_IDs=%s Mem=%"PRIu64" GRES_IDX=%s",
				 last_hosts, tmp2,
				 last_mem_alloc_ptr ? last_mem_alloc : 0,
				 gres_last);
			xfree(last_hosts);
			xstrcat(out, line_end);
		}
		hostlist_destroy(hl);
		hostlist_destroy(hl_last);
	}
	/****** Line 18 ******/
	if (job_ptr->pn_min_memory & MEM_PER_CPU) {
		job_ptr->pn_min_memory &= (~MEM_PER_CPU);
		tmp6_ptr = "CPU";
	} else
		tmp6_ptr = "Node";

	if (cluster_flags & CLUSTER_FLAG_BG) {
		convert_num_unit((float)job_ptr->pn_min_cpus, tmp1,
				 sizeof(tmp1), UNIT_NONE, NO_VAL,
				 CONVERT_NUM_UNIT_EXACT);
		xstrfmtcat(out, "MinCPUsNode=%s ", tmp1);
	} else {
		xstrfmtcat(out, "MinCPUsNode=%u ", job_ptr->pn_min_cpus);
	}

	convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1),
			 UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT);
	convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2),
			 UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT);
	xstrfmtcat(out, "MinMemory%s=%s MinTmpDiskNode=%s", tmp6_ptr, tmp1, tmp2);
	xstrcat(out, line_end);

	/****** Line ******/
	secs2time_str((time_t)job_ptr->delay_boot, tmp1, sizeof(tmp1));
	xstrfmtcat(out, "Features=%s DelayBoot=%s", job_ptr->features, tmp1);
	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "Gres=%s Reservation=%s",
		   job_ptr->gres, job_ptr->resv_name);
	xstrcat(out, line_end);

	/****** Line 20 ******/
	xstrfmtcat(out, "OverSubscribe=%s Contiguous=%d Licenses=%s Network=%s",
		   job_share_string(job_ptr->shared), job_ptr->contiguous,
		   job_ptr->licenses, job_ptr->network);
	xstrcat(out, line_end);

	/****** Line 21 ******/
	xstrfmtcat(out, "Command=%s", job_ptr->command);
	xstrcat(out, line_end);

	/****** Line 22 ******/
	xstrfmtcat(out, "WorkDir=%s", job_ptr->work_dir);

	if (cluster_flags & CLUSTER_FLAG_BG) {
		/****** Line 23 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_BG_ID);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "Block_ID=%s", select_buf);
		}

		/****** Line 24 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MIXED_SHORT);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrcat(out, select_buf);
		}

		/****** Line 26 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_LINUX_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "CnloadImage=%s", select_buf);
		}
		/****** Line 27 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MLOADER_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "MloaderImage=%s", select_buf);
		}
		/****** Line 28 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_RAMDISK_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "IoloadImage=%s", select_buf);
		}
	}

	/****** Line (optional) ******/
	if (job_ptr->admin_comment) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "AdminComment=%s ", job_ptr->admin_comment);
	}

	/****** Line (optional) ******/
	if (job_ptr->comment) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "Comment=%s ", job_ptr->comment);
	}

	/****** Line 30 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stderr(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdErr=%s", tmp_path);
	}

	/****** Line 31 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stdin(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdIn=%s", tmp_path);
	}

	/****** Line 32 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stdout(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdOut=%s", tmp_path);
	}

	/****** Line 33 (optional) ******/
	if (job_ptr->batch_script) {
		xstrcat(out, line_end);
		xstrcat(out, "BatchScript=\n");
		xstrcat(out, job_ptr->batch_script);
	}

	/****** Line 34 (optional) ******/
	if (job_ptr->req_switch) {
		char time_buf[32];
		xstrcat(out, line_end);
		secs2time_str((time_t) job_ptr->wait4switch, time_buf,
			      sizeof(time_buf));
		xstrfmtcat(out, "Switches=%u@%s\n", job_ptr->req_switch, time_buf);
	}

	/****** Line 35 (optional) ******/
	if (job_ptr->burst_buffer) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "BurstBuffer=%s", job_ptr->burst_buffer);
	}

	/****** Line (optional) ******/
	if (job_ptr->burst_buffer_state) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "BurstBufferState=%s",
			   job_ptr->burst_buffer_state);
	}

	/****** Line 36 (optional) ******/
	if (cpu_freq_debug(NULL, NULL, tmp1, sizeof(tmp1),
			   job_ptr->cpu_freq_gov, job_ptr->cpu_freq_min,
			   job_ptr->cpu_freq_max, NO_VAL) != 0) {
		xstrcat(out, line_end);
		xstrcat(out, tmp1);
	}

	/****** Line 37 ******/
	xstrcat(out, line_end);
	xstrfmtcat(out, "Power=%s", power_flags_str(job_ptr->power_flags));

	/****** Line 38 (optional) ******/
	if (job_ptr->bitflags) {
		xstrcat(out, line_end);
		if (job_ptr->bitflags & GRES_ENFORCE_BIND)
			xstrcat(out, "GresEnforceBind=Yes");
		if (job_ptr->bitflags & KILL_INV_DEP)
			xstrcat(out, "KillOInInvalidDependent=Yes");
		if (job_ptr->bitflags & NO_KILL_INV_DEP)
			xstrcat(out, "KillOInInvalidDependent=No");
		if (job_ptr->bitflags & SPREAD_JOB)
			xstrcat(out, "SpreadJob=Yes");
	}

	/****** END OF JOB RECORD ******/
	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;
}
Exemple #7
0
static int _attempt_backfill(void)
{
    bool filter_root = false;
    List job_queue;
    job_queue_rec_t *job_queue_rec;
    slurmdb_qos_rec_t *qos_ptr = NULL;
    int i, j, node_space_recs;
    struct job_record *job_ptr;
    struct part_record *part_ptr;
    uint32_t end_time, end_reserve;
    uint32_t time_limit, comp_time_limit, orig_time_limit;
    uint32_t min_nodes, max_nodes, req_nodes;
    bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
    time_t now = time(NULL), sched_start, later_start, start_res;
    node_space_map_t *node_space;
    static int sched_timeout = 0;
    int this_sched_timeout = 0, rc = 0;

    sched_start = now;
    if (sched_timeout == 0) {
        sched_timeout = slurm_get_msg_timeout() / 2;
        sched_timeout = MAX(sched_timeout, 1);
        sched_timeout = MIN(sched_timeout, 10);
    }
    this_sched_timeout = sched_timeout;

#ifdef HAVE_CRAY
    /*
     * Run a Basil Inventory immediately before setting up the schedule
     * plan, to avoid race conditions caused by ALPS node state change.
     * Needs to be done with the node-state lock taken.
     */
    if (select_g_reconfigure()) {
        debug4("backfill: not scheduling due to ALPS");
        return SLURM_SUCCESS;
    }
#endif

    if (slurm_get_root_filter())
        filter_root = true;

    job_queue = build_job_queue(true);
    if (list_count(job_queue) <= 1) {
        debug("backfill: no jobs to backfill");
        list_destroy(job_queue);
        return 0;
    }

    node_space = xmalloc(sizeof(node_space_map_t) *
                         (max_backfill_job_cnt + 3));
    node_space[0].begin_time = sched_start;
    node_space[0].end_time = sched_start + backfill_window;
    node_space[0].avail_bitmap = bit_copy(avail_node_bitmap);
    node_space[0].next = 0;
    node_space_recs = 1;
    if (debug_flags & DEBUG_FLAG_BACKFILL)
        _dump_node_space_table(node_space);

    while ((job_queue_rec = (job_queue_rec_t *)
                            list_pop_bottom(job_queue, sort_job_queue2))) {
        job_ptr  = job_queue_rec->job_ptr;
        part_ptr = job_queue_rec->part_ptr;
        xfree(job_queue_rec);
        if (!IS_JOB_PENDING(job_ptr))
            continue;	/* started in other partition */
        job_ptr->part_ptr = part_ptr;

        if (debug_flags & DEBUG_FLAG_BACKFILL)
            info("backfill test for job %u", job_ptr->job_id);

        if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) ||
                (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) ||
                (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT) ||
                (job_ptr->state_reason == WAIT_QOS_JOB_LIMIT) ||
                (job_ptr->state_reason == WAIT_QOS_RESOURCE_LIMIT) ||
                (job_ptr->state_reason == WAIT_QOS_TIME_LIMIT) ||
                !acct_policy_job_runnable(job_ptr)) {
            debug2("backfill: job %u is not allowed to run now. "
                   "Skipping it. State=%s. Reason=%s. Priority=%u",
                   job_ptr->job_id,
                   job_state_string(job_ptr->job_state),
                   job_reason_string(job_ptr->state_reason),
                   job_ptr->priority);
            continue;
        }

        if (((part_ptr->state_up & PARTITION_SCHED) == 0) ||
                (part_ptr->node_bitmap == NULL))
            continue;
        if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) && filter_root)
            continue;

        if ((!job_independent(job_ptr, 0)) ||
                (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS))
            continue;

        /* Determine minimum and maximum node counts */
        min_nodes = MAX(job_ptr->details->min_nodes,
                        part_ptr->min_nodes);
        if (job_ptr->details->max_nodes == 0)
            max_nodes = part_ptr->max_nodes;
        else
            max_nodes = MIN(job_ptr->details->max_nodes,
                            part_ptr->max_nodes);
        max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */
        if (job_ptr->details->max_nodes)
            req_nodes = max_nodes;
        else
            req_nodes = min_nodes;
        if (min_nodes > max_nodes) {
            /* job's min_nodes exceeds partition's max_nodes */
            continue;
        }

        /* Determine job's expected completion time */
        if (job_ptr->time_limit == NO_VAL) {
            if (part_ptr->max_time == INFINITE)
                time_limit = 365 * 24 * 60; /* one year */
            else
                time_limit = part_ptr->max_time;
        } else {
            if (part_ptr->max_time == INFINITE)
                time_limit = job_ptr->time_limit;
            else
                time_limit = MIN(job_ptr->time_limit,
                                 part_ptr->max_time);
        }
        comp_time_limit = time_limit;
        orig_time_limit = job_ptr->time_limit;
        if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
            time_limit = job_ptr->time_limit = 1;
        else if (job_ptr->time_min && (job_ptr->time_min < time_limit))
            time_limit = job_ptr->time_limit = job_ptr->time_min;

        /* Determine impact of any resource reservations */
        later_start = now;
TRY_LATER:
        FREE_NULL_BITMAP(avail_bitmap);
        start_res   = later_start;
        later_start = 0;
        j = job_test_resv(job_ptr, &start_res, true, &avail_bitmap);
        if (j != SLURM_SUCCESS) {
            job_ptr->time_limit = orig_time_limit;
            continue;
        }
        if (start_res > now)
            end_time = (time_limit * 60) + start_res;
        else
            end_time = (time_limit * 60) + now;

        /* Identify usable nodes for this job */
        bit_and(avail_bitmap, part_ptr->node_bitmap);
        bit_and(avail_bitmap, up_node_bitmap);
        for (j=0; ; ) {
            if ((node_space[j].end_time > start_res) &&
                    node_space[j].next && (later_start == 0))
                later_start = node_space[j].end_time;
            if (node_space[j].end_time <= start_res)
                ;
            else if (node_space[j].begin_time <= end_time) {
                bit_and(avail_bitmap,
                        node_space[j].avail_bitmap);
            } else
                break;
            if ((j = node_space[j].next) == 0)
                break;
        }

        if (job_ptr->details->exc_node_bitmap) {
            bit_not(job_ptr->details->exc_node_bitmap);
            bit_and(avail_bitmap,
                    job_ptr->details->exc_node_bitmap);
            bit_not(job_ptr->details->exc_node_bitmap);
        }

        /* Test if insufficient nodes remain OR
         *	required nodes missing OR
         *	nodes lack features */
        if ((bit_set_count(avail_bitmap) < min_nodes) ||
                ((job_ptr->details->req_node_bitmap) &&
                 (!bit_super_set(job_ptr->details->req_node_bitmap,
                                 avail_bitmap))) ||
                (job_req_node_filter(job_ptr, avail_bitmap))) {
            if (later_start) {
                job_ptr->start_time = 0;
                goto TRY_LATER;
            }
            job_ptr->time_limit = orig_time_limit;
            continue;
        }

        /* Identify nodes which are definitely off limits */
        FREE_NULL_BITMAP(resv_bitmap);
        resv_bitmap = bit_copy(avail_bitmap);
        bit_not(resv_bitmap);

        if ((time(NULL) - sched_start) >= this_sched_timeout) {
            debug("backfill: loop taking too long, yielding locks");
            if (_yield_locks()) {
                debug("backfill: system state changed, "
                      "breaking out");
                rc = 1;
                break;
            } else {
                this_sched_timeout += sched_timeout;
            }
        }
        /* this is the time consuming operation */
        debug2("backfill: entering _try_sched for job %u.",
               job_ptr->job_id);
        j = _try_sched(job_ptr, &avail_bitmap,
                       min_nodes, max_nodes, req_nodes);
        debug2("backfill: finished _try_sched for job %u.",
               job_ptr->job_id);
        now = time(NULL);
        if (j != SLURM_SUCCESS) {
            job_ptr->time_limit = orig_time_limit;
            job_ptr->start_time = 0;
            continue;	/* not runable */
        }

        if (start_res > job_ptr->start_time) {
            job_ptr->start_time = start_res;
            last_job_update = now;
        }
        if (job_ptr->start_time <= now) {
            int rc = _start_job(job_ptr, resv_bitmap);
            if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
                job_ptr->time_limit = orig_time_limit;
            else if ((rc == SLURM_SUCCESS) && job_ptr->time_min) {
                /* Set time limit as high as possible */
                job_ptr->time_limit = comp_time_limit;
                job_ptr->end_time = job_ptr->start_time +
                                    (comp_time_limit * 60);
                _reset_job_time_limit(job_ptr, now,
                                      node_space);
                time_limit = job_ptr->time_limit;
            } else {
                job_ptr->time_limit = orig_time_limit;
            }
            if (rc == ESLURM_ACCOUNTING_POLICY) {
                /* Unknown future start time, just skip job */
                job_ptr->start_time = 0;
                continue;
            } else if (rc != SLURM_SUCCESS) {
                /* Planned to start job, but something bad
                 * happended. */
                job_ptr->start_time = 0;
                break;
            } else {
                /* Started this job, move to next one */
                continue;
            }
        } else
            job_ptr->time_limit = orig_time_limit;

        if (later_start && (job_ptr->start_time > later_start)) {
            /* Try later when some nodes currently reserved for
             * pending jobs are free */
            job_ptr->start_time = 0;
            goto TRY_LATER;
        }

        if (job_ptr->start_time > (sched_start + backfill_window)) {
            /* Starts too far in the future to worry about */
            continue;
        }

        if (node_space_recs >= max_backfill_job_cnt) {
            /* Already have too many jobs to deal with */
            break;
        }

        end_reserve = job_ptr->start_time + (time_limit * 60);
        if (_test_resv_overlap(node_space, avail_bitmap,
                               job_ptr->start_time, end_reserve)) {
            /* This job overlaps with an existing reservation for
             * job to be backfill scheduled, which the sched
             * plugin does not know about. Try again later. */
            later_start = job_ptr->start_time;
            job_ptr->start_time = 0;
            goto TRY_LATER;
        }

        /*
         * Add reservation to scheduling table if appropriate
         */
        qos_ptr = job_ptr->qos_ptr;
        if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
            continue;
        bit_not(avail_bitmap);
        _add_reservation(job_ptr->start_time, end_reserve,
                         avail_bitmap, node_space, &node_space_recs);
        if (debug_flags & DEBUG_FLAG_BACKFILL)
            _dump_node_space_table(node_space);
    }
    FREE_NULL_BITMAP(avail_bitmap);
    FREE_NULL_BITMAP(resv_bitmap);

    for (i=0; ; ) {
        FREE_NULL_BITMAP(node_space[i].avail_bitmap);
        if ((i = node_space[i].next) == 0)
            break;
    }
    xfree(node_space);
    list_destroy(job_queue);
    return rc;
}