Пример #1
0
/*
 * _parse_state - convert job state name string to numeric value
 * IN str - state name
 * OUT states - enum job_states value corresponding to str
 * RET 0 or error code
 */
static int
_parse_state( char* str, uint16_t* states )
{
	int i;
	char *state_names;

	if ((i = job_state_num(str)) >= 0) {
		*states = (uint16_t) i;
		return SLURM_SUCCESS;
	}

	error ("Invalid job state specified: %s", str);
	state_names = xstrdup(job_state_string(0));
	for (i=1; i<JOB_END; i++) {
		xstrcat(state_names, ",");
		xstrcat(state_names, job_state_string(i));
	}
	xstrcat(state_names, ",");
	xstrcat(state_names, job_state_string(JOB_COMPLETING));
	xstrcat(state_names, ",");
	xstrcat(state_names, job_state_string(JOB_CONFIGURING));
	error ("Valid job states include: %s\n", state_names);
	xfree (state_names);
	return SLURM_ERROR;
}
Пример #2
0
static int _sort_job_by_state(void *void1, void *void2)
{
	int diff;
	job_info_t *job1;
	job_info_t *job2;

	_get_job_info_from_void(&job1, &job2, void1, void2);

	diff = strcmp(job_state_string(job1->job_state),
			 job_state_string(job2->job_state));

	if (reverse_order)
		diff = -diff;
	return diff;
}
Пример #3
0
static void _process_terminated(List job_list, char *f[], int lc,
				int show_full, int len)
{
	filetxt_job_rec_t *job = NULL;
	filetxt_job_rec_t *temp = NULL;

	_parse_line(f, (void **)&temp, len);

	if (temp == NULL) {
		error("Unknown proccess terminated");
		return;
	}

	job = _find_job_record(job_list, temp->header, JOB_TERMINATED);
	if (!job) {	/* fake it for now */
		job = _create_filetxt_job_rec(temp->header);
		job->jobname = xstrdup("(unknown)");
		debug("Note: JOB_TERMINATED record for job "
		      "%u preceded "
		      "other job records at line %d\n",
		      temp->header.jobnum, lc);
	} else if (job->job_terminated_seen) {
		if (temp->status == JOB_NODE_FAIL) {
			/* multiple node failures - extra TERMINATED records */
			debug("Note: Duplicate JOB_TERMINATED "
			      "record (nf) for job %u at "
			      "line %d\n",
			      temp->header.jobnum, lc);
			/* JOB_TERMINATED/NF records may be preceded
			 * by a JOB_TERMINATED/CA record; NF is much
			 * more interesting.
			 */
			job->status = temp->status;
			goto finished;
		}

		fprintf(stderr,
			"Conflicting JOB_TERMINATED record (%s) for "
			"job %u at line %d -- ignoring it\n",
			job_state_string(temp->status),
			job->header.jobnum, lc);
		goto finished;
	}
	job->job_terminated_seen = 1;
	job->elapsed = temp->elapsed;
	job->end = temp->header.timestamp;
	job->status = temp->status;
	job->requid = temp->requid;
	job->exitcode = temp->exitcode;
	if (list_count(job->steps) > 1)
		job->track_steps = 1;
	job->show_full = show_full;

finished:
	_destroy_filetxt_job_rec(temp);
}
Пример #4
0
Файл: print.c Проект: Cray/slurm
int _print_job_job_state(job_info_t * job, int width, bool right, char* suffix)
{
	if (job == NULL)	/* Print the Header instead */
		_print_str("STATE", width, right, true);
	else
		_print_str(job_state_string(job->job_state), width, right,
			   true);
	if (suffix)
		printf("%s", suffix);
	return SLURM_SUCCESS;
}
Пример #5
0
/* Log all SICP job records */
static void _log_sicp_recs(void)
{
    ListIterator sicp_iterator;
    sicp_job_t *sicp_ptr;

    sicp_iterator = list_iterator_create(sicp_job_list);
    while ((sicp_ptr = (sicp_job_t *) list_next(sicp_iterator))) {
        info("SICP: Job_ID:%u State:%s", sicp_ptr->job_id,
             job_state_string(sicp_ptr->job_state));
    }
    list_iterator_destroy(sicp_iterator);
}
Пример #6
0
Файл: opt.c Проект: HDOD/slurm
static void _opt_list(void)
{
	int i;

	info("account        : %s", opt.account);
	info("batch          : %s", tf_(opt.batch));
	info("ctld           : %s", tf_(opt.ctld));
	info("full           : %s", tf_(opt.full));
	info("interactive    : %s", tf_(opt.interactive));
	info("job_name       : %s", opt.job_name);
	info("nodelist       : %s", opt.nodelist);
	info("partition      : %s", opt.partition);
	info("qos            : %s", opt.qos);
	info("reservation    : %s", opt.reservation);
	if (opt.signal != (uint16_t) NO_VAL)
		info("signal         : %u", opt.signal);
	info("state          : %s", job_state_string(opt.state));
	info("user_id        : %u", opt.user_id);
	info("user_name      : %s", opt.user_name);
	info("verbose        : %d", opt.verbose);
	info("wckey          : %s", opt.wckey);

	for (i = 0; i < opt.job_cnt; i++) {
		if (opt.step_id[i] == SLURM_BATCH_SCRIPT) {
			if (opt.array_id[i] == NO_VAL) {
				info("job_id[%d]      : %u",
				     i, opt.job_id[i]);
			} else if (opt.array_id[i] == INFINITE) {
				info("job_id[%d]      : %u_*",
				     i, opt.job_id[i]);
			} else {
				info("job_id[%d]      : %u_%u",
				     i, opt.job_id[i], opt.array_id[i]);
			}
		} else {
			if (opt.array_id[i] == NO_VAL) {
				info("job_step_id[%d] : %u.%u",
				     i, opt.job_id[i], opt.step_id[i]);
			} else if (opt.array_id[i] == INFINITE) {
				info("job_step_id[%d] : %u_*.%u",
				     i, opt.job_id[i], opt.step_id[i]);
			} else {
				info("job_step_id[%d] : %u_%u.%u",
				     i, opt.job_id[i], opt.array_id[i],
				     opt.step_id[i]);
			}
		}
	}
}
Пример #7
0
static void _opt_list(void)
{
	int i;

	info("account        : %s", opt.account);
	info("batch          : %s", tf_(opt.batch));
	info("ctld           : %s", tf_(opt.ctld));
	info("interactive    : %s", tf_(opt.interactive));
	info("job_name       : %s", opt.job_name);
	info("nodelist       : %s", opt.nodelist);
	info("partition      : %s", opt.partition);
	info("qos            : %s", opt.qos);
	info("reservation    : %s", opt.reservation);
	info("signal         : %u", opt.signal);
	info("state          : %s", job_state_string(opt.state));
	info("user_id        : %u", opt.user_id);
	info("user_name      : %s", opt.user_name);
	info("verbose        : %d", opt.verbose);
	info("wckey          : %s", opt.wckey);

	for (i=0; i<opt.job_cnt; i++) {
		info("job_steps      : %u.%u ", opt.job_id[i], opt.step_id[i]);
	}
}
Пример #8
0
extern int slurm_jobcomp_log_record(struct job_record *job_ptr)
{
	int nwritten, B_SIZE = 1024;
	char usr_str[32], grp_str[32], start_str[32], end_str[32];
	char submit_str[32], *cluster = NULL, *qos, *state_string;
	time_t elapsed_time, submit_time, eligible_time;
	enum job_states job_state;
	uint32_t time_limit;
	uint16_t ntasks_per_node;
	int i;
	char *buffer, tmp_str[256], *script_str, *script;
	struct job_node *jnode;

	if (list_count(jobslist) > MAX_JOBS) {
		error("%s: Limit of %d enqueued jobs in memory waiting to be "
		      "indexed reached. Job %lu discarded", plugin_type,
		      MAX_JOBS, (unsigned long)job_ptr->job_id);
		return SLURM_ERROR;
	}

	_get_user_name(job_ptr->user_id, usr_str, sizeof(usr_str));
	_get_group_name(job_ptr->group_id, grp_str, sizeof(grp_str));

	if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr)
		time_limit = job_ptr->part_ptr->max_time;
	else
		time_limit = job_ptr->time_limit;

	if (job_ptr->job_state & JOB_RESIZING) {
		time_t now = time(NULL);
		state_string = job_state_string(job_ptr->job_state);
		if (job_ptr->resize_time) {
			_make_time_str(&job_ptr->resize_time, start_str,
				       sizeof(start_str));
		} else {
			_make_time_str(&job_ptr->start_time, start_str,
				       sizeof(start_str));
		}
		_make_time_str(&now, end_str, sizeof(end_str));
	} else {
		/* Job state will typically have JOB_COMPLETING or JOB_RESIZING
		 * flag set when called. We remove the flags to get the eventual
		 * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */
		job_state = job_ptr->job_state & JOB_STATE_BASE;
		state_string = job_state_string(job_state);
		if (job_ptr->resize_time) {
			_make_time_str(&job_ptr->resize_time, start_str,
				       sizeof(start_str));
		} else if (job_ptr->start_time > job_ptr->end_time) {
			/* Job cancelled while pending and
			 * expected start time is in the future. */
			snprintf(start_str, sizeof(start_str), "Unknown");
		} else {
			_make_time_str(&job_ptr->start_time, start_str,
				       sizeof(start_str));
		}
		_make_time_str(&job_ptr->end_time, end_str, sizeof(end_str));
	}

	elapsed_time = job_ptr->end_time - job_ptr->start_time;

	buffer = xmalloc(B_SIZE);

	nwritten = snprintf(buffer, B_SIZE, JOBCOMP_DATA_FORMAT,
			    (unsigned long) job_ptr->job_id, usr_str,
			    (unsigned long) job_ptr->user_id, grp_str,
			    (unsigned long) job_ptr->group_id, start_str,
			    end_str, (long) elapsed_time,
			    job_ptr->partition, job_ptr->alloc_node,
			    job_ptr->nodes, (unsigned long) job_ptr->total_cpus,
			    (unsigned long) job_ptr->total_nodes,
			    (unsigned long) job_ptr->derived_ec,
			    (unsigned long) job_ptr->exit_code, state_string);

	if (nwritten >= B_SIZE) {
		B_SIZE += nwritten + 1;
		buffer = xrealloc(buffer, B_SIZE);

		nwritten = snprintf(buffer, B_SIZE, JOBCOMP_DATA_FORMAT,
				    (unsigned long) job_ptr->job_id, usr_str,
				    (unsigned long) job_ptr->user_id, grp_str,
				    (unsigned long) job_ptr->group_id,
				    start_str, end_str, (long) elapsed_time,
				    job_ptr->partition, job_ptr->alloc_node,
				    job_ptr->nodes,
				    (unsigned long) job_ptr->total_cpus,
				    (unsigned long) job_ptr->total_nodes,
				    (unsigned long) job_ptr->derived_ec,
				    (unsigned long) job_ptr->exit_code,
				    state_string);

		if (nwritten >= B_SIZE) {
			error("%s: Job completion data truncated and lost",
			      plugin_type);
			return SLURM_ERROR;
		}
	}

	snprintf(tmp_str, sizeof(tmp_str), ",\"cpu_hours\":%.6f",
		 ((float) elapsed_time * (float) job_ptr->total_cpus) /
		  (float) 3600);
	xstrcat(buffer, tmp_str);

	if (job_ptr->array_task_id != NO_VAL) {
		xstrfmtcat(buffer, ",\"array_job_id\":%lu",
			   (unsigned long) job_ptr->array_job_id);
		xstrfmtcat(buffer, ",\"array_task_id\":%lu",
			   (unsigned long) job_ptr->array_task_id);
	}

	if (job_ptr->details && (job_ptr->details->submit_time != NO_VAL)) {
		submit_time = job_ptr->details->submit_time;
		_make_time_str(&submit_time, submit_str, sizeof(submit_str));
		xstrfmtcat(buffer, ",\"@submit\":\"%s\"", submit_str);
	}

	if (job_ptr->details && (job_ptr->details->begin_time != NO_VAL)) {
		eligible_time = job_ptr->start_time -
				job_ptr->details->begin_time;
		xstrfmtcat(buffer, ",\"eligible_time\":%lu", eligible_time);
	}

	if (job_ptr->details
	    && (job_ptr->details->work_dir && job_ptr->details->work_dir[0])) {
		xstrfmtcat(buffer, ",\"work_dir\":\"%s\"",
			   job_ptr->details->work_dir);
	}

	if (job_ptr->details
	    && (job_ptr->details->std_err && job_ptr->details->std_err[0])) {
		xstrfmtcat(buffer, ",\"std_err\":\"%s\"",
			   job_ptr->details->std_err);
	}

	if (job_ptr->details
	    && (job_ptr->details->std_in && job_ptr->details->std_in[0])) {
		xstrfmtcat(buffer, ",\"std_in\":\"%s\"",
			   job_ptr->details->std_in);
	}

	if (job_ptr->details
	    && (job_ptr->details->std_out && job_ptr->details->std_out[0])) {
		xstrfmtcat(buffer, ",\"std_out\":\"%s\"",
			   job_ptr->details->std_out);
	}

	if (job_ptr->assoc_ptr != NULL) {
		cluster = ((slurmdb_assoc_rec_t *) job_ptr->assoc_ptr)->cluster;
		xstrfmtcat(buffer, ",\"cluster\":\"%s\"", cluster);
	}

	if (job_ptr->qos_ptr != NULL) {
		slurmdb_qos_rec_t *assoc =
			(slurmdb_qos_rec_t *) job_ptr->qos_ptr;
		qos = assoc->name;
		xstrfmtcat(buffer, ",\"qos\":\"%s\"", qos);
	}

	if (job_ptr->details && (job_ptr->details->num_tasks != NO_VAL)) {
		xstrfmtcat(buffer, ",\"ntasks\":%hu",
			   job_ptr->details->num_tasks);
	}

	if (job_ptr->details && (job_ptr->details->ntasks_per_node != NO_VAL)) {
		ntasks_per_node = job_ptr->details->ntasks_per_node;
		xstrfmtcat(buffer, ",\"ntasks_per_node\":%hu", ntasks_per_node);
	}

	if (job_ptr->details && (job_ptr->details->cpus_per_task != NO_VAL)) {
		xstrfmtcat(buffer, ",\"cpus_per_task\":%hu",
			   job_ptr->details->cpus_per_task);
	}

	if (job_ptr->details
	    && (job_ptr->details->orig_dependency
		&& job_ptr->details->orig_dependency[0])) {
		xstrfmtcat(buffer, ",\"orig_dependency\":\"%s\"",
			   job_ptr->details->orig_dependency);
	}

	if (job_ptr->details
	    && (job_ptr->details->exc_nodes
		&& job_ptr->details->exc_nodes[0])) {
		xstrfmtcat(buffer, ",\"excluded_nodes\":\"%s\"",
			   job_ptr->details->exc_nodes);
	}

	if (time_limit != INFINITE) {
		xstrfmtcat(buffer, ",\"time_limit\":%lu",
			   (unsigned long) time_limit * 60);
	}

	if (job_ptr->resv_name && job_ptr->resv_name[0]) {
		xstrfmtcat(buffer, ",\"reservation_name\":\"%s\"",
			   job_ptr->resv_name);
	}

	if (job_ptr->gres_req && job_ptr->gres_req[0]) {
		xstrfmtcat(buffer, ",\"gres_req\":\"%s\"", job_ptr->gres_req);
	}

	if (job_ptr->gres_alloc && job_ptr->gres_alloc[0]) {
		xstrfmtcat(buffer, ",\"gres_alloc\":\"%s\"",
			   job_ptr->gres_alloc);
	}

	if (job_ptr->account && job_ptr->account[0]) {
		xstrfmtcat(buffer, ",\"account\":\"%s\"", job_ptr->account);
	}

	script = get_job_script(job_ptr);
	if (script && script[0]) {
		script_str = _json_escape(script);
		xstrfmtcat(buffer, ",\"script\":\"%s\"", script_str);
		xfree(script_str);
	}
	xfree(script);

	if (job_ptr->assoc_ptr) {
		assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK,
					   NO_LOCK, NO_LOCK, NO_LOCK };
		slurmdb_assoc_rec_t *assoc_ptr = job_ptr->assoc_ptr;
		char *parent_accounts = NULL;
		char **acc_aux = NULL;
		int nparents = 0;

		assoc_mgr_lock(&locks);

		/* Start at the first parent and go up.  When studying
		 * this code it was slightly faster to do 2 loops on
		 * the association linked list and only 1 xmalloc but
		 * we opted for cleaner looking code and going with a
		 * realloc. */
		while (assoc_ptr) {
			if (assoc_ptr->acct) {
				acc_aux = xrealloc(acc_aux,
						   sizeof(char *) *
						   (nparents + 1));
				acc_aux[nparents++] = assoc_ptr->acct;
			}
			assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		}

		for (i = nparents - 1; i >= 0; i--)
			xstrfmtcat(parent_accounts, "/%s", acc_aux[i]);
		xfree(acc_aux);

		xstrfmtcat(buffer, ",\"parent_accounts\":\"%s\"",
			   parent_accounts);

		xfree(parent_accounts);

		assoc_mgr_unlock(&locks);
	}

	xstrcat(buffer, "}");
	jnode = xmalloc(sizeof(struct job_node));
	jnode->serialized_job = xstrdup(buffer);
	list_enqueue(jobslist, jnode);

	return SLURM_SUCCESS;
}
Пример #9
0
extern List pgsql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond)
{

	char *query = NULL;
	char *extra = NULL;
	char *tmp = NULL;
	char *selected_part = NULL;
	slurmdb_selected_step_t *selected_step = NULL;
	ListIterator itr = NULL;
	int set = 0;
	PGresult *result = NULL;
	int i;
	jobcomp_job_rec_t *job = NULL;
	char time_str[32];
	time_t temp_time;
	List job_list = NULL;

	if (job_cond->step_list && list_count(job_cond->step_list)) {
		set = 0;
		xstrcat(extra, " where (");
		itr = list_iterator_create(job_cond->step_list);
		while((selected_step = list_next(itr))) {
			if (set)
				xstrcat(extra, " || ");
			tmp = xstrdup_printf("jobid=%d",
					      selected_step->jobid);
			xstrcat(extra, tmp);
			set = 1;
			xfree(tmp);
		}
		list_iterator_destroy(itr);
		xstrcat(extra, ")");
	}

	if (job_cond->partition_list && list_count(job_cond->partition_list)) {
		set = 0;
		if (extra)
			xstrcat(extra, " && (");
		else
			xstrcat(extra, " where (");

		itr = list_iterator_create(job_cond->partition_list);
		while((selected_part = list_next(itr))) {
			if (set)
				xstrcat(extra, " || ");
			tmp = xstrdup_printf("partition='%s'",
					      selected_part);
			xstrcat(extra, tmp);
			set = 1;
			xfree(tmp);
		}
		list_iterator_destroy(itr);
		xstrcat(extra, ")");
	}

	i = 0;
	while(jobcomp_table_fields[i].name) {
		if (i)
			xstrcat(tmp, ", ");
		xstrcat(tmp, jobcomp_table_fields[i].name);
		i++;
	}

	query = xstrdup_printf("select %s from %s", tmp, jobcomp_table);
	xfree(tmp);

	if (extra) {
		xstrcat(query, extra);
		xfree(extra);
	}

	//info("query = %s", query);
	if (!(result =
	     pgsql_db_query_ret(jobcomp_pgsql_db, query))) {
		xfree(query);
		return NULL;
	}
	xfree(query);

	job_list = list_create(jobcomp_destroy_job);
	for (i = 0; i < PQntuples(result); i++) {
		job = xmalloc(sizeof(jobcomp_job_rec_t));
		if (PQgetvalue(result, i, JOBCOMP_REQ_JOBID))
			job->jobid =
				atoi(PQgetvalue(result, i, JOBCOMP_REQ_JOBID));
		job->partition =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_PARTITION));
		temp_time = atoi(PQgetvalue(result, i, JOBCOMP_REQ_STARTTIME));
		slurm_make_time_str(&temp_time,
				    time_str,
				    sizeof(time_str));
		job->start_time = xstrdup(time_str);

		temp_time = atoi(PQgetvalue(result, i, JOBCOMP_REQ_ENDTIME));
		slurm_make_time_str(&temp_time,
				    time_str,
				    sizeof(time_str));
		job->end_time = xstrdup(time_str);

		if (PQgetvalue(result, i, JOBCOMP_REQ_UID))
			job->uid =
				atoi(PQgetvalue(result, i, JOBCOMP_REQ_UID));
		job->uid_name =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_USER_NAME));
		if (PQgetvalue(result, i, JOBCOMP_REQ_GID))
			job->gid =
				atoi(PQgetvalue(result, i, JOBCOMP_REQ_GID));
		job->gid_name =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_GROUP_NAME));
		job->jobname =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_NAME));
		job->nodelist =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_NODELIST));
		if (PQgetvalue(result, i, JOBCOMP_REQ_NODECNT))
			job->node_cnt =
				atoi(PQgetvalue(result, i, JOBCOMP_REQ_NODECNT));
		if (PQgetvalue(result, i, JOBCOMP_REQ_STATE)) {
			int j = atoi(PQgetvalue(result, i, JOBCOMP_REQ_STATE));
			job->state = xstrdup(job_state_string(j));
		}
		job->timelimit =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_TIMELIMIT));
		if (PQgetvalue(result, i, JOBCOMP_REQ_MAXPROCS))
			job->max_procs =
				atoi(PQgetvalue(result, i,
						JOBCOMP_REQ_MAXPROCS));
		job->blockid =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_BLOCKID));
		job->connection =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_CONNECTION));
		job->reboot =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_REBOOT));
		job->rotate =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_ROTATE));
		job->geo =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_GEOMETRY));
		job->bg_start_point =
			xstrdup(PQgetvalue(result, i, JOBCOMP_REQ_START));
		list_append(job_list, job);
	}

	PQclear(result);
	return job_list;
}
Пример #10
0
void parse_command_line(int argc, char **argv)
{
	extern int optind;
	int c, i, optionIndex = 0;
	char *end = NULL, *start = NULL, *acct_type = NULL;
	slurmdb_selected_step_t *selected_step = NULL;
	ListIterator itr = NULL;
	struct stat stat_buf;
	char *dot = NULL;
	bool brief_output = FALSE, long_output = FALSE;
	bool all_users = 0;
	bool all_clusters = 0;
	slurmdb_job_cond_t *job_cond = params.job_cond;
	log_options_t opts = LOG_OPTS_STDERR_ONLY ;
	int verbosity;		/* count of -v options */
	bool set;

	static struct option long_options[] = {
                {"allusers",       no_argument,       0,                      'a'},
                {"accounts",       required_argument, 0,                      'A'},
                {"allocations",    no_argument,       &params.opt_allocs,     OPT_LONG_ALLOCS},
                {"brief",          no_argument,       0,                      'b'},
                {"completion",     no_argument,       &params.opt_completion, 'c'},
                {"duplicates",     no_argument,       &params.opt_dup,        OPT_LONG_DUP},
                {"helpformat",     no_argument,       0,                      'e'},
                {"help-fields",    no_argument,       0,                      'e'},
                {"endtime",        required_argument, 0,                      'E'},
                {"file",           required_argument, 0,                      'f'},
                {"gid",            required_argument, 0,                      'g'},
                {"group",          required_argument, 0,                      'g'},
                {"help",           no_argument,       0,                      'h'},
                {"helpformat",     no_argument,       &params.opt_help,       OPT_LONG_HELP},
                {"name",           required_argument, 0,                      OPT_LONG_NAME},
                {"nnodes",         required_argument, 0,                      'i'},
                {"ncpus",          required_argument, 0,                      'I'},
                {"jobs",           required_argument, 0,                      'j'},
                {"timelimit-min",  required_argument, 0,                      'k'},
                {"timelimit-max",  required_argument, 0,                      'K'},
                {"long",           no_argument,       0,                      'l'},
                {"allclusters",    no_argument,       0,                      'L'},
                {"cluster",        required_argument, 0,                      'M'},
                {"clusters",       required_argument, 0,                      'M'},
                {"nodelist",       required_argument, 0,                      'N'},
                {"noheader",       no_argument,       0,                      'n'},
                {"fields",         required_argument, 0,                      'o'},
                {"format",         required_argument, 0,                      'o'},
                {"parsable",       no_argument,       0,                      'p'},
                {"parsable2",      no_argument,       0,                      'P'},
                {"qos",            required_argument, 0,                      'q'},
                {"partition",      required_argument, 0,                      'r'},
                {"state",          required_argument, 0,                      's'},
                {"starttime",      required_argument, 0,                      'S'},
                {"truncate",       no_argument,       0,                      'T'},
                {"uid",            required_argument, 0,                      'u'},
                {"usage",          no_argument,       &params.opt_help,       OPT_LONG_USAGE},
                {"user",           required_argument, 0,                      'u'},
                {"verbose",        no_argument,       0,                      'v'},
                {"version",        no_argument,       0,                      'V'},
                {"wckeys",         required_argument, 0,                      'W'},
                {"associations",   required_argument, 0,                      'x'},
                {0,                0,		      0,                      0}};

	params.opt_uid = getuid();
	params.opt_gid = getgid();

	verbosity         = 0;
	log_init("sacct", opts, SYSLOG_FACILITY_DAEMON, NULL);
	opterr = 1;		/* Let getopt report problems to the user */

	while (1) {		/* now cycle through the command line */
		c = getopt_long(argc, argv,
				"aA:bcC:dDeE:f:g:hi:I:j:k:K:lLM:nN:o:OpPq:r:s:S:Ttu:vVW:x:X",
				long_options, &optionIndex);
		if (c == -1)
			break;
		switch (c) {
		case 'a':
			all_users = 1;
			break;
		case 'A':
			if(!job_cond->acct_list)
				job_cond->acct_list =
					list_create(slurm_destroy_char);
			slurm_addto_char_list(job_cond->acct_list, optarg);
			break;
		case 'b':
			brief_output = true;
			break;
		case 'c':
			params.opt_completion = 1;
			break;
		case 'C':
			/* 'C' is deprecated since 'M' is cluster on
			   everything else.
			*/
		case 'M':
			if(!strcasecmp(optarg, "-1")) {
				all_clusters = 1;
				break;
			}
			all_clusters=0;
			if(!job_cond->cluster_list)
				job_cond->cluster_list =
					list_create(slurm_destroy_char);
			slurm_addto_char_list(job_cond->cluster_list, optarg);
			break;
		case 'D':
			params.opt_dup = 1;
			break;
		case 'e':
			params.opt_help = 2;
			break;
		case 'E':
			job_cond->usage_end = parse_time(optarg, 1);
			if (errno == ESLURM_INVALID_TIME_VALUE)
				exit(1);
			break;
		case 'f':
			xfree(params.opt_filein);
			params.opt_filein = xstrdup(optarg);
			break;
		case 'g':
			if(!job_cond->groupid_list)
				job_cond->groupid_list =
					list_create(slurm_destroy_char);
			_addto_id_char_list(job_cond->groupid_list, optarg, 1);
			break;
		case 'h':
			params.opt_help = 1;
			break;
		case 'i':
			set = get_resource_arg_range(
				optarg,
				"requested node range",
				(int *)&job_cond->nodes_min,
				(int *)&job_cond->nodes_max,
				true);

			if (set == false) {
				error("invalid node range -i '%s'",
				      optarg);
				exit(1);
			}
			break;
		case 'I':
			set = get_resource_arg_range(
				optarg,
				"requested cpu range",
				(int *)&job_cond->cpus_min,
				(int *)&job_cond->cpus_max,
				true);

			if (set == false) {
				error("invalid cpu range -i '%s'",
				      optarg);
				exit(1);
			}
			break;
		case 'j':
			if ((strspn(optarg, "0123456789, ") < strlen(optarg))
			    && (strspn(optarg, ".batch0123456789, ")
				< strlen(optarg))) {
				fprintf(stderr, "Invalid jobs list: %s\n",
					optarg);
				exit(1);
			}

			if(!job_cond->step_list)
				job_cond->step_list = list_create(
					slurmdb_destroy_selected_step);
			_addto_step_list(job_cond->step_list, optarg);
			break;
		case 'k':
			job_cond->timelimit_min = time_str2mins(optarg);
			if (((int32_t)job_cond->timelimit_min <= 0)
			    && (job_cond->timelimit_min != INFINITE))
				fatal("Invalid time limit specification");
			break;
		case 'K':
			job_cond->timelimit_max = time_str2mins(optarg);
			if (((int32_t)job_cond->timelimit_max <= 0)
			    && (job_cond->timelimit_max != INFINITE))
				fatal("Invalid time limit specification");
			break;
		case 'L':
			all_clusters = 1;
			break;
		case 'l':
			long_output = true;
			break;
		case 'n':
			print_fields_have_header = 0;
			break;
		case 'N':
			if(job_cond->used_nodes) {
				error("Aleady asked for nodes '%s'",
				      job_cond->used_nodes);
				break;
			}
			job_cond->used_nodes = xstrdup(optarg);
			break;
		case OPT_LONG_NAME:
			if(!job_cond->jobname_list)
				job_cond->jobname_list =
					list_create(slurm_destroy_char);
			slurm_addto_char_list(job_cond->jobname_list, optarg);
			break;
		case 'o':
			xstrfmtcat(params.opt_field_list, "%s,", optarg);
			break;
		case 'p':
			print_fields_parsable_print =
				PRINT_FIELDS_PARSABLE_ENDING;
			break;
		case 'P':
			print_fields_parsable_print =
				PRINT_FIELDS_PARSABLE_NO_ENDING;
			break;
		case 'q':
			if (!g_qos_list) {
				slurmdb_qos_cond_t qos_cond;
				memset(&qos_cond, 0,
				       sizeof(slurmdb_qos_cond_t));
				qos_cond.with_deleted = 1;
				g_qos_list = slurmdb_qos_get(
					acct_db_conn, &qos_cond);
			}

			if(!job_cond->qos_list)
				job_cond->qos_list =
					list_create(slurm_destroy_char);

			if(!slurmdb_addto_qos_char_list(job_cond->qos_list,
							g_qos_list, optarg, 0))
				fatal("problem processing qos list");
			break;
		case 'r':
			if(!job_cond->partition_list)
				job_cond->partition_list =
					list_create(slurm_destroy_char);

			slurm_addto_char_list(job_cond->partition_list,
					      optarg);
			break;
		case 's':
			if(!job_cond->state_list)
				job_cond->state_list =
					list_create(slurm_destroy_char);

			_addto_state_char_list(job_cond->state_list, optarg);
			break;
		case 'S':
			job_cond->usage_start = parse_time(optarg, 1);
			if (errno == ESLURM_INVALID_TIME_VALUE)
				exit(1);
			break;
		case 'T':
			job_cond->without_usage_truncation = 0;
			break;
		case 'U':
			params.opt_help = 3;
			break;
		case 'u':
			if(!strcmp(optarg, "-1")) {
				all_users = 1;
				break;
			}
			all_users = 0;
			if(!job_cond->userid_list)
				job_cond->userid_list =
					list_create(slurm_destroy_char);
			_addto_id_char_list(job_cond->userid_list, optarg, 0);
			break;
		case 'v':
			/* Handle -vvv thusly...
			 */
			verbosity++;
			break;
		case 'W':
			if(!job_cond->wckey_list)
				job_cond->wckey_list =
					list_create(slurm_destroy_char);
			slurm_addto_char_list(job_cond->wckey_list, optarg);
			break;
		case 'V':
			print_slurm_version();
			exit(0);
		case 'x':
			if(!job_cond->associd_list)
				job_cond->associd_list =
					list_create(slurm_destroy_char);
			slurm_addto_char_list(job_cond->associd_list, optarg);
			break;
		case 't':
		case 'X':
			params.opt_allocs = 1;
			break;
		case ':':
		case '?':	/* getopt() has explained it */
			exit(1);
		}
	}

	if (verbosity) {
		opts.stderr_level += verbosity;
		opts.prefix_level = 1;
		log_alter(opts, 0, NULL);
	}


	/* Now set params.opt_dup, unless they've already done so */
	if (params.opt_dup < 0)	/* not already set explicitly */
		params.opt_dup = 0;

	job_cond->duplicates = params.opt_dup;
	job_cond->without_steps = params.opt_allocs;

	if(!job_cond->usage_start && !job_cond->step_list) {
		struct tm start_tm;
		job_cond->usage_start = time(NULL);

		if (!localtime_r(&job_cond->usage_start, &start_tm)) {
			error("Couldn't get localtime from %ld",
			      (long)job_cond->usage_start);
			return;
		}
		start_tm.tm_sec = 0;
		start_tm.tm_min = 0;
		start_tm.tm_hour = 0;
		start_tm.tm_isdst = -1;
		job_cond->usage_start = mktime(&start_tm);
	}

	if(verbosity > 0) {
		char *start_char =NULL, *end_char = NULL;

		start_char = xstrdup(ctime(&job_cond->usage_start));
		/* remove the new line */
		start_char[strlen(start_char)-1] = '\0';
		if(job_cond->usage_end) {
			end_char = xstrdup(ctime(&job_cond->usage_end));
			/* remove the new line */
			end_char[strlen(end_char)-1] = '\0';
		} else
			end_char = xstrdup("Now");
		info("Jobs eligible from %s - %s", start_char, end_char);
		xfree(start_char);
		xfree(end_char);
	}

	debug("Options selected:\n"
	      "\topt_completion=%d\n"
	      "\topt_dup=%d\n"
	      "\topt_field_list=%s\n"
	      "\topt_help=%d\n"
	      "\topt_allocs=%d",
	      params.opt_completion,
	      params.opt_dup,
	      params.opt_field_list,
	      params.opt_help,
	      params.opt_allocs);

	if(params.opt_completion) {
		g_slurm_jobcomp_init(params.opt_filein);

		acct_type = slurm_get_jobcomp_type();
		if ((strcmp(acct_type, "jobcomp/none") == 0)
		    &&  (stat(params.opt_filein, &stat_buf) != 0)) {
			fprintf(stderr, "SLURM job completion is disabled\n");
			exit(1);
		}
		xfree(acct_type);
	} else {
		slurm_acct_storage_init(params.opt_filein);

		acct_type = slurm_get_accounting_storage_type();
		if ((strcmp(acct_type, "accounting_storage/none") == 0)
		    &&  (stat(params.opt_filein, &stat_buf) != 0)) {
			fprintf(stderr,
				"SLURM accounting storage is disabled\n");
			exit(1);
		}
		xfree(acct_type);
		acct_db_conn = slurmdb_connection_get();
		if(errno != SLURM_SUCCESS) {
			error("Problem talking to the database: %m");
			exit(1);
		}
	}

	/* specific clusters requested? */
	if(all_clusters) {
		if(job_cond->cluster_list
		   && list_count(job_cond->cluster_list)) {
			list_destroy(job_cond->cluster_list);
			job_cond->cluster_list = NULL;
		}
		debug2("Clusters requested:\tall");
	} else if (job_cond->cluster_list
		   && list_count(job_cond->cluster_list)) {
		debug2( "Clusters requested:");
		itr = list_iterator_create(job_cond->cluster_list);
		while((start = list_next(itr)))
			debug2("\t: %s", start);
		list_iterator_destroy(itr);
	} else if(!job_cond->cluster_list
		  || !list_count(job_cond->cluster_list)) {
		if(!job_cond->cluster_list)
			job_cond->cluster_list =
				list_create(slurm_destroy_char);
		if((start = slurm_get_cluster_name())) {
			list_append(job_cond->cluster_list, start);
			debug2("Clusters requested:\t%s", start);
		}
	}

	/* if any jobs or nodes are specified set to look for all users if none
	   are set */
	if(!job_cond->userid_list || !list_count(job_cond->userid_list))
		if((job_cond->step_list && list_count(job_cond->step_list))
		   || job_cond->used_nodes)
			all_users=1;

	/* set all_users for user root if not requesting any */
	if(!job_cond->userid_list && !params.opt_uid)
		all_users = 1;

	if(all_users) {
		if(job_cond->userid_list && list_count(job_cond->userid_list)) {
			list_destroy(job_cond->userid_list);
			job_cond->userid_list = NULL;
		}
		debug2("Userids requested:\tall");
	} else if (job_cond->userid_list && list_count(job_cond->userid_list)) {
		debug2("Userids requested:");
		itr = list_iterator_create(job_cond->userid_list);
		while((start = list_next(itr)))
			debug2("\t: %s", start);
		list_iterator_destroy(itr);
	} else if(!job_cond->userid_list
		  || !list_count(job_cond->userid_list)) {
		if(!job_cond->userid_list)
			job_cond->userid_list = list_create(slurm_destroy_char);
		start = xstrdup_printf("%u", params.opt_uid);
		list_append(job_cond->userid_list, start);
		debug2("Userid requested\t: %s", start);
	}

	if (job_cond->groupid_list && list_count(job_cond->groupid_list)) {
		debug2("Groupids requested:");
		itr = list_iterator_create(job_cond->groupid_list);
		while((start = list_next(itr)))
			debug2("\t: %s", start);
		list_iterator_destroy(itr);
	}

	/* specific partitions requested? */
	if (job_cond->partition_list && list_count(job_cond->partition_list)) {
		debug2("Partitions requested:");
		itr = list_iterator_create(job_cond->partition_list);
		while((start = list_next(itr)))
			debug2("\t: %s", start);
		list_iterator_destroy(itr);
	}

	/* specific qos' requested? */
	if (job_cond->qos_list && list_count(job_cond->qos_list)) {
		start = get_qos_complete_str(g_qos_list, job_cond->qos_list);
		debug2("QOS requested\t: %s\n", start);
		xfree(start);
	}

	/* specific jobs requested? */
	if (job_cond->step_list && list_count(job_cond->step_list)) {
		debug2("Jobs requested:");
		itr = list_iterator_create(job_cond->step_list);
		while((selected_step = list_next(itr))) {
			if(selected_step->stepid != NO_VAL)
				debug2("\t: %d.%d",
					selected_step->jobid,
					selected_step->stepid);
			else
				debug2("\t: %d",
					selected_step->jobid);
		}
		list_iterator_destroy(itr);
	}

	/* specific states (completion state) requested? */
	if (job_cond->state_list && list_count(job_cond->state_list)) {
		debug2("States requested:");
		itr = list_iterator_create(job_cond->state_list);
		while((start = list_next(itr))) {
			debug2("\t: %s",
				job_state_string(atoi(start)));
		}
		list_iterator_destroy(itr);
	}

	if (job_cond->wckey_list && list_count(job_cond->wckey_list)) {
		debug2("Wckeys requested:");
		itr = list_iterator_create(job_cond->wckey_list);
		while((start = list_next(itr)))
			debug2("\t: %s\n", start);
		list_iterator_destroy(itr);
	}

	if (job_cond->timelimit_min) {
		char time_str[128], tmp1[32], tmp2[32];
		mins2time_str(job_cond->timelimit_min, tmp1, sizeof(tmp1));
		sprintf(time_str, "%s", tmp1);
		if(job_cond->timelimit_max) {
			int len = strlen(tmp1);
			mins2time_str(job_cond->timelimit_max,
				      tmp2, sizeof(tmp2));
			sprintf(time_str+len, " - %s", tmp2);
		}
		debug2("Timelimit requested\t: %s", time_str);
	}

	/* specific jobnames requested? */
	if (job_cond->jobname_list && list_count(job_cond->jobname_list)) {
		debug2("Jobnames requested:");
		itr = list_iterator_create(job_cond->jobname_list);
		while((start = list_next(itr))) {
			debug2("\t: %s", start);
		}
		list_iterator_destroy(itr);
	}

	/* select the output fields */
	if(brief_output) {
		if(params.opt_completion)
			dot = BRIEF_COMP_FIELDS;
		else
			dot = BRIEF_FIELDS;

		xstrfmtcat(params.opt_field_list, "%s,", dot);
	}

	if(long_output) {
		if(params.opt_completion)
			dot = LONG_COMP_FIELDS;
		else
			dot = LONG_FIELDS;

		xstrfmtcat(params.opt_field_list, "%s,", dot);
	}

	if (params.opt_field_list==NULL) {
		if(params.opt_completion)
			dot = DEFAULT_COMP_FIELDS;
		else
			dot = DEFAULT_FIELDS;

		xstrfmtcat(params.opt_field_list, "%s,", dot);
	}

	start = params.opt_field_list;
	while ((end = strstr(start, ","))) {
		char *tmp_char = NULL;
		int command_len = 0;
		int newlen = 0;

		*end = 0;
		while (isspace(*start))
			start++;	/* discard whitespace */
		if(!(int)*start)
			continue;

		if((tmp_char = strstr(start, "\%"))) {
			newlen = atoi(tmp_char+1);
			tmp_char[0] = '\0';
		}

		command_len = strlen(start);

		for (i = 0; fields[i].name; i++) {
			if (!strncasecmp(fields[i].name, start, command_len))
				goto foundfield;
		}
		error("Invalid field requested: \"%s\"", start);
		exit(1);
	foundfield:
		if(newlen)
			fields[i].len = newlen;
		list_append(print_fields_list, &fields[i]);
		start = end + 1;
	}
Пример #11
0
/*
 * slurm_sprint_job_info - output information about a specific Slurm
 *	job based upon message as loaded using slurm_load_jobs
 * IN job_ptr - an individual job information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
extern char *
slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner )
{
	int i, j;
	char time_str[32], *group_name, *user_name;
	char tmp1[128], tmp2[128], tmp3[128], tmp4[128], tmp5[128], *tmp6_ptr;
	char tmp_line[512];
	char *ionodes = NULL;
	uint16_t exit_status = 0, term_sig = 0;
	job_resources_t *job_resrcs = job_ptr->job_resrcs;
	char *out = NULL;
	time_t run_time;
	uint32_t min_nodes, max_nodes = 0;
	char *nodelist = "NodeList";
	bitstr_t *core_bitmap;
	char *host;
	int sock_inx, sock_reps, last;
	int abs_node_inx, rel_node_inx;
	int bit_inx, bit_reps;
	uint32_t *last_mem_alloc_ptr = NULL;
	uint32_t last_mem_alloc = NO_VAL;
	char *last_hosts;
	hostlist_t hl, hl_last;
	char select_buf[122];
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	if (cluster_flags & CLUSTER_FLAG_BG) {
		nodelist = "MidplaneList";
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_IONODES,
					    &ionodes);
	}

	/****** Line 1 ******/
	snprintf(tmp_line, sizeof(tmp_line), "JobId=%u ", job_ptr->job_id);
	out = xstrdup(tmp_line);
	if (job_ptr->array_job_id) {
		snprintf(tmp_line, sizeof(tmp_line), 
			 "ArrayJobId=%u ArrayTaskId=%u ",
			 job_ptr->array_job_id, job_ptr->array_task_id);
		xstrcat(out, tmp_line);
	}
	snprintf(tmp_line, sizeof(tmp_line), "Name=%s", job_ptr->name);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 2 ******/
	user_name = uid_to_string((uid_t) job_ptr->user_id);
	group_name = gid_to_string((gid_t) job_ptr->group_id);
	snprintf(tmp_line, sizeof(tmp_line),
		 "UserId=%s(%u) GroupId=%s(%u)",
		 user_name, job_ptr->user_id, group_name, job_ptr->group_id);
	xfree(user_name);
	xfree(group_name);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 3 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "Priority=%u Account=%s QOS=%s",
		 job_ptr->priority, job_ptr->account, job_ptr->qos);
	xstrcat(out, tmp_line);
	if (slurm_get_track_wckey()) {
		snprintf(tmp_line, sizeof(tmp_line),
			 " WCKey=%s", job_ptr->wckey);
		xstrcat(out, tmp_line);
	}
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 4 ******/
	if (job_ptr->state_desc) {
		/* Replace white space with underscore for easier parsing */
		for (j=0; job_ptr->state_desc[j]; j++) {
			if (isspace((int)job_ptr->state_desc[j]))
				job_ptr->state_desc[j] = '_';
		}
		tmp6_ptr = job_ptr->state_desc;
	} else
		tmp6_ptr = job_reason_string(job_ptr->state_reason);
	snprintf(tmp_line, sizeof(tmp_line),
		 "JobState=%s Reason=%s Dependency=%s",
		 job_state_string(job_ptr->job_state), tmp6_ptr,
		 job_ptr->dependency);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 5 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "Requeue=%u Restarts=%u BatchFlag=%u ",
		 job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag);
	xstrcat(out, tmp_line);
	if (WIFSIGNALED(job_ptr->exit_code))
		term_sig = WTERMSIG(job_ptr->exit_code);
	exit_status = WEXITSTATUS(job_ptr->exit_code);
	snprintf(tmp_line, sizeof(tmp_line),
		 "ExitCode=%u:%u", exit_status, term_sig);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 5a (optional) ******/
	if (!(job_ptr->show_flags & SHOW_DETAIL))
		goto line6;
	if (WIFSIGNALED(job_ptr->derived_ec))
		term_sig = WTERMSIG(job_ptr->derived_ec);
	else
		term_sig = 0;
	exit_status = WEXITSTATUS(job_ptr->derived_ec);
	snprintf(tmp_line, sizeof(tmp_line),
		 "DerivedExitCode=%u:%u", exit_status, term_sig);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 6 ******/
line6:
	snprintf(tmp_line, sizeof(tmp_line), "RunTime=");
	xstrcat(out, tmp_line);
	if (IS_JOB_PENDING(job_ptr))
		run_time = 0;
	else if (IS_JOB_SUSPENDED(job_ptr))
		run_time = job_ptr->pre_sus_time;
	else {
		time_t end_time;
		if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0))
			end_time = time(NULL);
		else
			end_time = job_ptr->end_time;
		if (job_ptr->suspend_time) {
			run_time = (time_t)
				(difftime(end_time, job_ptr->suspend_time)
				 + job_ptr->pre_sus_time);
		} else
			run_time = (time_t)
				difftime(end_time, job_ptr->start_time);
	}
	secs2time_str(run_time, tmp1, sizeof(tmp1));
	sprintf(tmp_line, "%s ", tmp1);
	xstrcat(out, tmp_line);

	snprintf(tmp_line, sizeof(tmp_line), "TimeLimit=");
	xstrcat(out, tmp_line);
	if (job_ptr->time_limit == NO_VAL)
		sprintf(tmp_line, "Partition_Limit");
	else {
		mins2time_str(job_ptr->time_limit, tmp_line,
			      sizeof(tmp_line));
	}
	xstrcat(out, tmp_line);
	snprintf(tmp_line, sizeof(tmp_line), " TimeMin=");
	xstrcat(out, tmp_line);
	if (job_ptr->time_min == 0)
		sprintf(tmp_line, "N/A");
	else {
		mins2time_str(job_ptr->time_min, tmp_line,
			      sizeof(tmp_line));
	}
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 7 ******/
	slurm_make_time_str((time_t *)&job_ptr->submit_time, time_str,
			    sizeof(time_str));
	snprintf(tmp_line, sizeof(tmp_line), "SubmitTime=%s ", time_str);
	xstrcat(out, tmp_line);

	slurm_make_time_str((time_t *)&job_ptr->eligible_time, time_str,
			    sizeof(time_str));
	snprintf(tmp_line, sizeof(tmp_line), "EligibleTime=%s", time_str);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 8 (optional) ******/
	if (job_ptr->resize_time) {
		slurm_make_time_str((time_t *)&job_ptr->resize_time, time_str,
				    sizeof(time_str));
		snprintf(tmp_line, sizeof(tmp_line), "ResizeTime=%s", time_str);
		xstrcat(out, tmp_line);
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
	}

	/****** Line 9 ******/
	slurm_make_time_str((time_t *)&job_ptr->start_time, time_str,
			    sizeof(time_str));
	snprintf(tmp_line, sizeof(tmp_line), "StartTime=%s ", time_str);
	xstrcat(out, tmp_line);

	snprintf(tmp_line, sizeof(tmp_line), "EndTime=");
	xstrcat(out, tmp_line);
	if ((job_ptr->time_limit == INFINITE) &&
	    (job_ptr->end_time > time(NULL)))
		sprintf(tmp_line, "Unknown");
	else {
		slurm_make_time_str ((time_t *)&job_ptr->end_time, time_str,
				     sizeof(time_str));
		sprintf(tmp_line, "%s", time_str);
	}
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 10 ******/
	if (job_ptr->preempt_time == 0)
		sprintf(tmp_line, "PreemptTime=None ");
	else {
		slurm_make_time_str((time_t *)&job_ptr->preempt_time,
				    time_str, sizeof(time_str));
		snprintf(tmp_line, sizeof(tmp_line), "PreemptTime=%s ",
			 time_str);
	}
	xstrcat(out, tmp_line);
	if (job_ptr->suspend_time) {
		slurm_make_time_str ((time_t *)&job_ptr->suspend_time,
				     time_str, sizeof(time_str));
	} else {
		strncpy(time_str, "None", sizeof(time_str));
	}
	snprintf(tmp_line, sizeof(tmp_line),
		 "SuspendTime=%s SecsPreSuspend=%ld",
		 time_str, (long int)job_ptr->pre_sus_time);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 11 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "Partition=%s AllocNode:Sid=%s:%u",
		 job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 12 ******/
	snprintf(tmp_line, sizeof(tmp_line), "Req%s=%s Exc%s=%s",
		 nodelist, job_ptr->req_nodes, nodelist, job_ptr->exc_nodes);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 13 ******/
	xstrfmtcat(out, "%s=", nodelist);
	xstrcat(out, job_ptr->nodes);
	if (job_ptr->nodes && ionodes) {
		snprintf(tmp_line, sizeof(tmp_line), "[%s]", ionodes);
		xstrcat(out, tmp_line);
		xfree(ionodes);
	}
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 14 (optional) ******/
	if (job_ptr->batch_host) {
		snprintf(tmp_line, sizeof(tmp_line), "BatchHost=%s",
			 job_ptr->batch_host);
		xstrcat(out, tmp_line);
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
	}

	/****** Line 15 ******/
	if (cluster_flags & CLUSTER_FLAG_BG) {
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &min_nodes);
		if ((min_nodes == 0) || (min_nodes == NO_VAL)) {
			min_nodes = job_ptr->num_nodes;
			max_nodes = job_ptr->max_nodes;
		} else if (job_ptr->max_nodes)
			max_nodes = min_nodes;
	} else {
		min_nodes = job_ptr->num_nodes;
		max_nodes = job_ptr->max_nodes;
	}

	_sprint_range(tmp1, sizeof(tmp1), job_ptr->num_cpus, job_ptr->max_cpus);
	_sprint_range(tmp2, sizeof(tmp2), min_nodes, max_nodes);
	if (job_ptr->sockets_per_node == (uint16_t) NO_VAL)
		strcpy(tmp3, "*");
	else
		snprintf(tmp3, sizeof(tmp3), "%u", job_ptr->sockets_per_node);
	if (job_ptr->cores_per_socket == (uint16_t) NO_VAL)
		strcpy(tmp4, "*");
	else
		snprintf(tmp4, sizeof(tmp4), "%u", job_ptr->cores_per_socket);
	if (job_ptr->threads_per_core == (uint16_t) NO_VAL)
		strcpy(tmp5, "*");
	else
		snprintf(tmp5, sizeof(tmp5), "%u", job_ptr->threads_per_core);
	snprintf(tmp_line, sizeof(tmp_line),
		 "NumNodes=%s NumCPUs=%s CPUs/Task=%u ReqS:C:T=%s:%s:%s",
		 tmp2, tmp1, job_ptr->cpus_per_task, tmp3, tmp4, tmp5);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	if (!job_resrcs)
		goto line15;

	if (cluster_flags & CLUSTER_FLAG_BG) {
		if ((job_resrcs->cpu_array_cnt > 0) &&
		    (job_resrcs->cpu_array_value) &&
		    (job_resrcs->cpu_array_reps)) {
			int length = 0;
			xstrcat(out, "CPUs=");
			length += 10;
			for (i = 0; i < job_resrcs->cpu_array_cnt; i++) {
				if (length > 70) {
					/* skip to last CPU group entry */
					if (i < job_resrcs->cpu_array_cnt - 1) {
						continue;
					}
					/* add ellipsis before last entry */
					xstrcat(out, "...,");
					length += 4;
				}

				snprintf(tmp_line, sizeof(tmp_line), "%d",
					 job_resrcs->cpus[i]);
				xstrcat(out, tmp_line);
				length += strlen(tmp_line);
				if (job_resrcs->cpu_array_reps[i] > 1) {
					snprintf(tmp_line, sizeof(tmp_line),
						 "*%d",
						 job_resrcs->cpu_array_reps[i]);
					xstrcat(out, tmp_line);
					length += strlen(tmp_line);
				}
				if (i < job_resrcs->cpu_array_cnt - 1) {
					xstrcat(out, ",");
					length++;
				}
			}
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
		}
	} else {
		if (!job_resrcs->core_bitmap)
			goto line15;

		last  = bit_fls(job_resrcs->core_bitmap);
		if (last == -1)
			goto line15;

		hl = hostlist_create(job_ptr->nodes);
		if (!hl) {
			error("slurm_sprint_job_info: hostlist_create: %s",
			      job_ptr->nodes);
			return NULL;
		}
		hl_last = hostlist_create(NULL);
		if (!hl_last) {
			error("slurm_sprint_job_info: hostlist_create: NULL");
			hostlist_destroy(hl);
			return NULL;
		}

		bit_inx = 0;
		i = sock_inx = sock_reps = 0;
		abs_node_inx = job_ptr->node_inx[i];

/*	tmp1[] stores the current cpu(s) allocated	*/
		tmp2[0] = '\0';	/* stores last cpu(s) allocated */
		for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts;
		     rel_node_inx++) {

			if (sock_reps >=
			    job_resrcs->sock_core_rep_count[sock_inx]) {
				sock_inx++;
				sock_reps = 0;
			}
			sock_reps++;

			bit_reps = job_resrcs->sockets_per_node[sock_inx] *
				job_resrcs->cores_per_socket[sock_inx];

			core_bitmap = bit_alloc(bit_reps);
			for (j=0; j < bit_reps; j++) {
				if (bit_test(job_resrcs->core_bitmap, bit_inx))
					bit_set(core_bitmap, j);
				bit_inx++;
			}

			bit_fmt(tmp1, sizeof(tmp1), core_bitmap);
			FREE_NULL_BITMAP(core_bitmap);
			host = hostlist_shift(hl);
/*
 *		If the allocation values for this host are not the same as the
 *		last host, print the report of the last group of hosts that had
 *		identical allocation values.
 */
			if (strcmp(tmp1, tmp2) ||
			    (last_mem_alloc_ptr != job_resrcs->memory_allocated) ||
			    (job_resrcs->memory_allocated &&
			     (last_mem_alloc !=
			      job_resrcs->memory_allocated[rel_node_inx]))) {
				if (hostlist_count(hl_last)) {
					last_hosts = 
						hostlist_ranged_string_xmalloc(
						hl_last);
					snprintf(tmp_line, sizeof(tmp_line),
						 "  Nodes=%s CPU_IDs=%s Mem=%u",
						 last_hosts, tmp2,
						 last_mem_alloc_ptr ?
						 last_mem_alloc : 0);
					xfree(last_hosts);
					xstrcat(out, tmp_line);
					if (one_liner)
						xstrcat(out, " ");
					else
						xstrcat(out, "\n   ");

					hostlist_destroy(hl_last);
					hl_last = hostlist_create(NULL);
				}
				strcpy(tmp2, tmp1);
				last_mem_alloc_ptr = job_resrcs->memory_allocated;
				if (last_mem_alloc_ptr)
					last_mem_alloc = job_resrcs->
						memory_allocated[rel_node_inx];
				else
					last_mem_alloc = NO_VAL;
			}
			hostlist_push_host(hl_last, host);
			free(host);

			if (bit_inx > last)
				break;

			if (abs_node_inx > job_ptr->node_inx[i+1]) {
				i += 2;
				abs_node_inx = job_ptr->node_inx[i];
			} else {
				abs_node_inx++;
			}
		}

		if (hostlist_count(hl_last)) {
			last_hosts = hostlist_ranged_string_xmalloc(hl_last);
			snprintf(tmp_line, sizeof(tmp_line),
				 "  Nodes=%s CPU_IDs=%s Mem=%u",
				 last_hosts, tmp2,
				 last_mem_alloc_ptr ? last_mem_alloc : 0);
			xfree(last_hosts);
			xstrcat(out, tmp_line);
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
		}
		hostlist_destroy(hl);
		hostlist_destroy(hl_last);
	}
	/****** Line 15 ******/
line15:
	if (job_ptr->pn_min_memory & MEM_PER_CPU) {
		job_ptr->pn_min_memory &= (~MEM_PER_CPU);
		tmp6_ptr = "CPU";
	} else
		tmp6_ptr = "Node";

	if (cluster_flags & CLUSTER_FLAG_BG) {
		convert_num_unit((float)job_ptr->pn_min_cpus,
				 tmp1, sizeof(tmp1), UNIT_NONE);
		snprintf(tmp_line, sizeof(tmp_line), "MinCPUsNode=%s",	tmp1);
	} else {
		snprintf(tmp_line, sizeof(tmp_line), "MinCPUsNode=%u",
			 job_ptr->pn_min_cpus);
	}

	xstrcat(out, tmp_line);
	convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1),
			 UNIT_MEGA);
	convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2),
			 UNIT_MEGA);
	snprintf(tmp_line, sizeof(tmp_line),
		 " MinMemory%s=%s MinTmpDiskNode=%s",
		 tmp6_ptr, tmp1, tmp2);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 16 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "Features=%s Gres=%s Reservation=%s",
		 job_ptr->features, job_ptr->gres, job_ptr->resv_name);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 17 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "Shared=%s Contiguous=%d Licenses=%s Network=%s",
		 (job_ptr->shared == 0 ? "0" :
		  job_ptr->shared == 1 ? "1" : "OK"),
		 job_ptr->contiguous, job_ptr->licenses, job_ptr->network);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 18 ******/
	snprintf(tmp_line, sizeof(tmp_line), "Command=%s",
		 job_ptr->command);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 19 ******/
	snprintf(tmp_line, sizeof(tmp_line), "WorkDir=%s",
		 job_ptr->work_dir);
	xstrcat(out, tmp_line);

	if (cluster_flags & CLUSTER_FLAG_BG) {
		/****** Line 20 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_BG_ID);
		if (select_buf[0] != '\0') {
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
			snprintf(tmp_line, sizeof(tmp_line),
				 "Block_ID=%s", select_buf);
			xstrcat(out, tmp_line);
		}

		/****** Line 21 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MIXED_SHORT);
		if (select_buf[0] != '\0') {
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
			xstrcat(out, select_buf);
		}

		if (cluster_flags & CLUSTER_FLAG_BGL) {
			/****** Line 22 (optional) ******/
			select_g_select_jobinfo_sprint(
				job_ptr->select_jobinfo,
				select_buf, sizeof(select_buf),
				SELECT_PRINT_BLRTS_IMAGE);
			if (select_buf[0] != '\0') {
				if (one_liner)
					xstrcat(out, " ");
				else
					xstrcat(out, "\n   ");
				snprintf(tmp_line, sizeof(tmp_line),
					 "BlrtsImage=%s", select_buf);
				xstrcat(out, tmp_line);
			}
		}
		/****** Line 23 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_LINUX_IMAGE);
		if (select_buf[0] != '\0') {
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
			if (cluster_flags & CLUSTER_FLAG_BGL)
				snprintf(tmp_line, sizeof(tmp_line),
					 "LinuxImage=%s", select_buf);
			else
				snprintf(tmp_line, sizeof(tmp_line),
					 "CnloadImage=%s", select_buf);

			xstrcat(out, tmp_line);
		}
		/****** Line 24 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MLOADER_IMAGE);
		if (select_buf[0] != '\0') {
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
			snprintf(tmp_line, sizeof(tmp_line),
				 "MloaderImage=%s", select_buf);
			xstrcat(out, tmp_line);
		}
		/****** Line 25 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_RAMDISK_IMAGE);
		if (select_buf[0] != '\0') {
			if (one_liner)
				xstrcat(out, " ");
			else
				xstrcat(out, "\n   ");
			if (cluster_flags & CLUSTER_FLAG_BGL)
				snprintf(tmp_line, sizeof(tmp_line),
					 "RamDiskImage=%s", select_buf);
			else
				snprintf(tmp_line, sizeof(tmp_line),
					 "IoloadImage=%s", select_buf);
			xstrcat(out, tmp_line);
		}
	}

	/****** Line 26 (optional) ******/
	if (job_ptr->comment) {
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
		snprintf(tmp_line, sizeof(tmp_line), "Comment=%s ",
			 job_ptr->comment);
		xstrcat(out, tmp_line);
	}

	/****** Line 27 (optional) ******/
	if (job_ptr->batch_script) {
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
		xstrcat(out, "BatchScript=\n");
		xstrcat(out, job_ptr->batch_script);
	}

	/****** Line 28 (optional) ******/
	if (job_ptr->req_switch) {
		char time_buf[32];
		if (one_liner)
			xstrcat(out, " ");
		else
			xstrcat(out, "\n   ");
		secs2time_str((time_t) job_ptr->wait4switch, time_buf,
			      sizeof(time_buf));
		snprintf(tmp_line, sizeof(tmp_line), "Switches=%u@%s\n",
			 job_ptr->req_switch, time_buf);
		xstrcat(out, tmp_line);
	}

	/****** Line 29 (optional) ******/
	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;

}
Пример #12
0
static int _attempt_backfill(void)
{
    bool filter_root = false;
    List job_queue;
    job_queue_rec_t *job_queue_rec;
    slurmdb_qos_rec_t *qos_ptr = NULL;
    int i, j, node_space_recs;
    struct job_record *job_ptr;
    struct part_record *part_ptr;
    uint32_t end_time, end_reserve;
    uint32_t time_limit, comp_time_limit, orig_time_limit;
    uint32_t min_nodes, max_nodes, req_nodes;
    bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
    time_t now = time(NULL), sched_start, later_start, start_res;
    node_space_map_t *node_space;
    static int sched_timeout = 0;
    int this_sched_timeout = 0, rc = 0;

    sched_start = now;
    if (sched_timeout == 0) {
        sched_timeout = slurm_get_msg_timeout() / 2;
        sched_timeout = MAX(sched_timeout, 1);
        sched_timeout = MIN(sched_timeout, 10);
    }
    this_sched_timeout = sched_timeout;

#ifdef HAVE_CRAY
    /*
     * Run a Basil Inventory immediately before setting up the schedule
     * plan, to avoid race conditions caused by ALPS node state change.
     * Needs to be done with the node-state lock taken.
     */
    if (select_g_reconfigure()) {
        debug4("backfill: not scheduling due to ALPS");
        return SLURM_SUCCESS;
    }
#endif

    if (slurm_get_root_filter())
        filter_root = true;

    job_queue = build_job_queue(true);
    if (list_count(job_queue) <= 1) {
        debug("backfill: no jobs to backfill");
        list_destroy(job_queue);
        return 0;
    }

    node_space = xmalloc(sizeof(node_space_map_t) *
                         (max_backfill_job_cnt + 3));
    node_space[0].begin_time = sched_start;
    node_space[0].end_time = sched_start + backfill_window;
    node_space[0].avail_bitmap = bit_copy(avail_node_bitmap);
    node_space[0].next = 0;
    node_space_recs = 1;
    if (debug_flags & DEBUG_FLAG_BACKFILL)
        _dump_node_space_table(node_space);

    while ((job_queue_rec = (job_queue_rec_t *)
                            list_pop_bottom(job_queue, sort_job_queue2))) {
        job_ptr  = job_queue_rec->job_ptr;
        part_ptr = job_queue_rec->part_ptr;
        xfree(job_queue_rec);
        if (!IS_JOB_PENDING(job_ptr))
            continue;	/* started in other partition */
        job_ptr->part_ptr = part_ptr;

        if (debug_flags & DEBUG_FLAG_BACKFILL)
            info("backfill test for job %u", job_ptr->job_id);

        if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) ||
                (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) ||
                (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT) ||
                (job_ptr->state_reason == WAIT_QOS_JOB_LIMIT) ||
                (job_ptr->state_reason == WAIT_QOS_RESOURCE_LIMIT) ||
                (job_ptr->state_reason == WAIT_QOS_TIME_LIMIT) ||
                !acct_policy_job_runnable(job_ptr)) {
            debug2("backfill: job %u is not allowed to run now. "
                   "Skipping it. State=%s. Reason=%s. Priority=%u",
                   job_ptr->job_id,
                   job_state_string(job_ptr->job_state),
                   job_reason_string(job_ptr->state_reason),
                   job_ptr->priority);
            continue;
        }

        if (((part_ptr->state_up & PARTITION_SCHED) == 0) ||
                (part_ptr->node_bitmap == NULL))
            continue;
        if ((part_ptr->flags & PART_FLAG_ROOT_ONLY) && filter_root)
            continue;

        if ((!job_independent(job_ptr, 0)) ||
                (license_job_test(job_ptr, time(NULL)) != SLURM_SUCCESS))
            continue;

        /* Determine minimum and maximum node counts */
        min_nodes = MAX(job_ptr->details->min_nodes,
                        part_ptr->min_nodes);
        if (job_ptr->details->max_nodes == 0)
            max_nodes = part_ptr->max_nodes;
        else
            max_nodes = MIN(job_ptr->details->max_nodes,
                            part_ptr->max_nodes);
        max_nodes = MIN(max_nodes, 500000);     /* prevent overflows */
        if (job_ptr->details->max_nodes)
            req_nodes = max_nodes;
        else
            req_nodes = min_nodes;
        if (min_nodes > max_nodes) {
            /* job's min_nodes exceeds partition's max_nodes */
            continue;
        }

        /* Determine job's expected completion time */
        if (job_ptr->time_limit == NO_VAL) {
            if (part_ptr->max_time == INFINITE)
                time_limit = 365 * 24 * 60; /* one year */
            else
                time_limit = part_ptr->max_time;
        } else {
            if (part_ptr->max_time == INFINITE)
                time_limit = job_ptr->time_limit;
            else
                time_limit = MIN(job_ptr->time_limit,
                                 part_ptr->max_time);
        }
        comp_time_limit = time_limit;
        orig_time_limit = job_ptr->time_limit;
        if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
            time_limit = job_ptr->time_limit = 1;
        else if (job_ptr->time_min && (job_ptr->time_min < time_limit))
            time_limit = job_ptr->time_limit = job_ptr->time_min;

        /* Determine impact of any resource reservations */
        later_start = now;
TRY_LATER:
        FREE_NULL_BITMAP(avail_bitmap);
        start_res   = later_start;
        later_start = 0;
        j = job_test_resv(job_ptr, &start_res, true, &avail_bitmap);
        if (j != SLURM_SUCCESS) {
            job_ptr->time_limit = orig_time_limit;
            continue;
        }
        if (start_res > now)
            end_time = (time_limit * 60) + start_res;
        else
            end_time = (time_limit * 60) + now;

        /* Identify usable nodes for this job */
        bit_and(avail_bitmap, part_ptr->node_bitmap);
        bit_and(avail_bitmap, up_node_bitmap);
        for (j=0; ; ) {
            if ((node_space[j].end_time > start_res) &&
                    node_space[j].next && (later_start == 0))
                later_start = node_space[j].end_time;
            if (node_space[j].end_time <= start_res)
                ;
            else if (node_space[j].begin_time <= end_time) {
                bit_and(avail_bitmap,
                        node_space[j].avail_bitmap);
            } else
                break;
            if ((j = node_space[j].next) == 0)
                break;
        }

        if (job_ptr->details->exc_node_bitmap) {
            bit_not(job_ptr->details->exc_node_bitmap);
            bit_and(avail_bitmap,
                    job_ptr->details->exc_node_bitmap);
            bit_not(job_ptr->details->exc_node_bitmap);
        }

        /* Test if insufficient nodes remain OR
         *	required nodes missing OR
         *	nodes lack features */
        if ((bit_set_count(avail_bitmap) < min_nodes) ||
                ((job_ptr->details->req_node_bitmap) &&
                 (!bit_super_set(job_ptr->details->req_node_bitmap,
                                 avail_bitmap))) ||
                (job_req_node_filter(job_ptr, avail_bitmap))) {
            if (later_start) {
                job_ptr->start_time = 0;
                goto TRY_LATER;
            }
            job_ptr->time_limit = orig_time_limit;
            continue;
        }

        /* Identify nodes which are definitely off limits */
        FREE_NULL_BITMAP(resv_bitmap);
        resv_bitmap = bit_copy(avail_bitmap);
        bit_not(resv_bitmap);

        if ((time(NULL) - sched_start) >= this_sched_timeout) {
            debug("backfill: loop taking too long, yielding locks");
            if (_yield_locks()) {
                debug("backfill: system state changed, "
                      "breaking out");
                rc = 1;
                break;
            } else {
                this_sched_timeout += sched_timeout;
            }
        }
        /* this is the time consuming operation */
        debug2("backfill: entering _try_sched for job %u.",
               job_ptr->job_id);
        j = _try_sched(job_ptr, &avail_bitmap,
                       min_nodes, max_nodes, req_nodes);
        debug2("backfill: finished _try_sched for job %u.",
               job_ptr->job_id);
        now = time(NULL);
        if (j != SLURM_SUCCESS) {
            job_ptr->time_limit = orig_time_limit;
            job_ptr->start_time = 0;
            continue;	/* not runable */
        }

        if (start_res > job_ptr->start_time) {
            job_ptr->start_time = start_res;
            last_job_update = now;
        }
        if (job_ptr->start_time <= now) {
            int rc = _start_job(job_ptr, resv_bitmap);
            if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
                job_ptr->time_limit = orig_time_limit;
            else if ((rc == SLURM_SUCCESS) && job_ptr->time_min) {
                /* Set time limit as high as possible */
                job_ptr->time_limit = comp_time_limit;
                job_ptr->end_time = job_ptr->start_time +
                                    (comp_time_limit * 60);
                _reset_job_time_limit(job_ptr, now,
                                      node_space);
                time_limit = job_ptr->time_limit;
            } else {
                job_ptr->time_limit = orig_time_limit;
            }
            if (rc == ESLURM_ACCOUNTING_POLICY) {
                /* Unknown future start time, just skip job */
                job_ptr->start_time = 0;
                continue;
            } else if (rc != SLURM_SUCCESS) {
                /* Planned to start job, but something bad
                 * happended. */
                job_ptr->start_time = 0;
                break;
            } else {
                /* Started this job, move to next one */
                continue;
            }
        } else
            job_ptr->time_limit = orig_time_limit;

        if (later_start && (job_ptr->start_time > later_start)) {
            /* Try later when some nodes currently reserved for
             * pending jobs are free */
            job_ptr->start_time = 0;
            goto TRY_LATER;
        }

        if (job_ptr->start_time > (sched_start + backfill_window)) {
            /* Starts too far in the future to worry about */
            continue;
        }

        if (node_space_recs >= max_backfill_job_cnt) {
            /* Already have too many jobs to deal with */
            break;
        }

        end_reserve = job_ptr->start_time + (time_limit * 60);
        if (_test_resv_overlap(node_space, avail_bitmap,
                               job_ptr->start_time, end_reserve)) {
            /* This job overlaps with an existing reservation for
             * job to be backfill scheduled, which the sched
             * plugin does not know about. Try again later. */
            later_start = job_ptr->start_time;
            job_ptr->start_time = 0;
            goto TRY_LATER;
        }

        /*
         * Add reservation to scheduling table if appropriate
         */
        qos_ptr = job_ptr->qos_ptr;
        if (qos_ptr && (qos_ptr->flags & QOS_FLAG_NO_RESERVE))
            continue;
        bit_not(avail_bitmap);
        _add_reservation(job_ptr->start_time, end_reserve,
                         avail_bitmap, node_space, &node_space_recs);
        if (debug_flags & DEBUG_FLAG_BACKFILL)
            _dump_node_space_table(node_space);
    }
    FREE_NULL_BITMAP(avail_bitmap);
    FREE_NULL_BITMAP(resv_bitmap);

    for (i=0; ; ) {
        FREE_NULL_BITMAP(node_space[i].avail_bitmap);
        if ((i = node_space[i].next) == 0)
            break;
    }
    xfree(node_space);
    list_destroy(job_queue);
    return rc;
}
Пример #13
0
static struct jobcomp_info * _jobcomp_info_create (struct job_record *job)
{
    enum job_states state;
    struct jobcomp_info * j = xmalloc (sizeof (*j));

    j->jobid = job->job_id;
    j->uid = job->user_id;
    j->gid = job->group_id;
    j->name = xstrdup (job->name);
    j->array_job_id = job->array_job_id;
    j->array_task_id = job->array_task_id;

    if (IS_JOB_RESIZING(job)) {
        state = JOB_RESIZING;
        j->jobstate = xstrdup (job_state_string (state));
        if (job->resize_time)
            j->start = job->resize_time;
        else
            j->start = job->start_time;
        j->end = time(NULL);
    } else {
        /* Job state will typically have JOB_COMPLETING or JOB_RESIZING
         * flag set when called. We remove the flags to get the eventual
         * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */
        state = job->job_state & JOB_STATE_BASE;
        j->jobstate = xstrdup (job_state_string (state));
        if (job->resize_time)
            j->start = job->resize_time;
        else if (job->start_time > job->end_time) {
            /* Job cancelled while pending and
             * expected start time is in the future. */
            j->start = 0;
        } else
            j->start = job->start_time;
        j->end = job->end_time;
    }

    j->partition = xstrdup (job->partition);
    if ((job->time_limit == NO_VAL) && job->part_ptr)
        j->limit = job->part_ptr->max_time;
    else
        j->limit = job->time_limit;
    j->submit = job->details ? job->details->submit_time:job->start_time;
    j->batch_flag = job->batch_flag;
    j->nodes = xstrdup (job->nodes);
    j->nprocs = job->total_cpus;
    j->nnodes = job->node_cnt;
    j->account = job->account ? xstrdup (job->account) : NULL;
    if (job->details && job->details->work_dir)
        j->work_dir = xstrdup(job->details->work_dir);
    else
        j->work_dir = xstrdup("unknown");
    if (job->details) {
        if (job->details->std_in)
            j->std_in = xstrdup(job->details->std_in);
        if (job->details->std_out)
            j->std_out = xstrdup(job->details->std_out);
        if (job->details->std_err)
            j->std_err = xstrdup(job->details->std_err);
    }

#ifdef HAVE_BG
    j->connect_type = select_g_select_jobinfo_xstrdup(job->select_jobinfo,
                      SELECT_PRINT_CONNECTION);
    j->geometry = select_g_select_jobinfo_xstrdup(job->select_jobinfo,
                  SELECT_PRINT_GEOMETRY);
    j->blockid = select_g_select_jobinfo_xstrdup(job->select_jobinfo,
                 SELECT_PRINT_BG_ID);
#endif
    return (j);
}
Пример #14
0
/*
 * slurm_sprint_job_step_info - output information about a specific Slurm
 *	job step based upon message as loaded using slurm_get_job_steps
 * IN job_ptr - an individual job step information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
char *
slurm_sprint_job_step_info ( job_step_info_t * job_step_ptr,
			    int one_liner )
{
	char tmp_node_cnt[40];
	char time_str[32];
	char limit_str[32];
	char tmp_line[128];
	char *out = NULL;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	/****** Line 1 ******/
	slurm_make_time_str ((time_t *)&job_step_ptr->start_time, time_str,
		sizeof(time_str));
	if (job_step_ptr->time_limit == INFINITE)
		sprintf(limit_str, "UNLIMITED");
	else
		secs2time_str ((time_t)job_step_ptr->time_limit * 60,
				limit_str, sizeof(limit_str));
	snprintf(tmp_line, sizeof(tmp_line),
		 "StepId=%u.%u UserId=%u StartTime=%s TimeLimit=%s",
		 job_step_ptr->job_id, job_step_ptr->step_id,
		 job_step_ptr->user_id, time_str, limit_str);
	out = xstrdup(tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 2 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "State=%s ",
		 job_state_string(job_step_ptr->state));
	xstrcat(out, tmp_line);
	if (cluster_flags & CLUSTER_FLAG_BG) {
		char *io_nodes;
		select_g_select_jobinfo_get(job_step_ptr->select_jobinfo,
					    SELECT_JOBDATA_IONODES,
					    &io_nodes);
		if (io_nodes) {
			snprintf(tmp_line, sizeof(tmp_line),
				 "Partition=%s MidplaneList=%s[%s] Gres=%s",
				 job_step_ptr->partition,
				 job_step_ptr->nodes, io_nodes,
				 job_step_ptr->gres);
			xfree(io_nodes);
		} else
			snprintf(tmp_line, sizeof(tmp_line),
				 "Partition=%s MidplaneList=%s Gres=%s",
				 job_step_ptr->partition,
				 job_step_ptr->nodes,
				 job_step_ptr->gres);
	} else {
		snprintf(tmp_line, sizeof(tmp_line),
			"Partition=%s NodeList=%s Gres=%s",
			job_step_ptr->partition, job_step_ptr->nodes,
			job_step_ptr->gres);
	}
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 3 ******/
	if (cluster_flags & CLUSTER_FLAG_BGQ) {
		uint32_t nodes = 0;
		select_g_select_jobinfo_get(job_step_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &nodes);
		convert_num_unit((float)nodes, tmp_node_cnt,
				 sizeof(tmp_node_cnt), UNIT_NONE);
	} else {
		convert_num_unit((float)_nodes_in_list(job_step_ptr->nodes),
				 tmp_node_cnt, sizeof(tmp_node_cnt),
				 UNIT_NONE);
	}

	snprintf(tmp_line, sizeof(tmp_line),
		"Nodes=%s Tasks=%u Name=%s Network=%s",
		 tmp_node_cnt, job_step_ptr->num_tasks, job_step_ptr->name,
		job_step_ptr->network);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 4 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		"ResvPorts=%s Checkpoint=%u CheckpointDir=%s",
		 job_step_ptr->resv_ports,
		 job_step_ptr->ckpt_interval, job_step_ptr->ckpt_dir);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 5 ******/
	if (job_step_ptr->cpu_freq == NO_VAL) {
		snprintf(tmp_line, sizeof(tmp_line), 
			 "CPUFreqReq=Default\n\n");
	} else if (job_step_ptr->cpu_freq & CPU_FREQ_RANGE_FLAG) {
		switch (job_step_ptr->cpu_freq) 
		{
		case CPU_FREQ_LOW :
			snprintf(tmp_line, sizeof(tmp_line),
				 "CPUFreqReq=Low\n\n");
			break;
		case CPU_FREQ_MEDIUM :
			snprintf(tmp_line, sizeof(tmp_line),
				 "CPUFreqReq=Medium\n\n");
			break;
		case CPU_FREQ_HIGH :
			snprintf(tmp_line, sizeof(tmp_line),
				 "CPUFreqReq=High\n\n");
			break;
		default :
			snprintf(tmp_line, sizeof(tmp_line),
				 "CPUFreqReq=Unknown\n\n");
		}
	} else {
		snprintf(tmp_line, sizeof(tmp_line),
			 "CPUFreqReq=%u\n\n", job_step_ptr->cpu_freq);
	}
	xstrcat(out, tmp_line);

	return out;
}
Пример #15
0
/* print the parameters specified */
static void
_print_options(void)
{
	ListIterator iterator;
	int i;
	char *part;
	uint32_t *user;
	enum job_states *state_id;
	squeue_job_step_t *job_step_id;
	uint32_t *job_id;
	char hostlist[8192];

	if (params.nodes) {
		hostset_ranged_string(params.nodes, sizeof(hostlist)-1,
				      hostlist);
	} else
		hostlist[0] = '\0';

	printf( "-----------------------------\n" );
	printf( "all         = %s\n", params.all_flag ? "true" : "false");
	printf( "format      = %s\n", params.format );
	printf( "iterate     = %d\n", params.iterate );
	printf( "job_flag    = %d\n", params.job_flag );
	printf( "jobs        = %s\n", params.jobs );
	printf( "max_cpus    = %d\n", params.max_cpus ) ;
	printf( "nodes       = %s\n", hostlist ) ;
	printf( "partitions  = %s\n", params.partitions ) ;
	printf( "reservation = %s\n", params.reservation ) ;
	printf( "sort        = %s\n", params.sort ) ;
	printf( "start_flag  = %d\n", params.start_flag );
	printf( "states      = %s\n", params.states ) ;
	printf( "step_flag   = %d\n", params.step_flag );
	printf( "steps       = %s\n", params.steps );
	printf( "users       = %s\n", params.users );
	printf( "verbose     = %d\n", params.verbose );

	if ((params.verbose > 1) && params.job_list) {
		i = 0;
		iterator = list_iterator_create( params.job_list );
		while ( (job_id = list_next( iterator )) ) {
			printf( "job_list[%d] = %u\n", i++, *job_id);
		}
		list_iterator_destroy( iterator );
	}

	if ((params.verbose > 1) && params.part_list) {
		i = 0;
		iterator = list_iterator_create( params.part_list );
		while ( (part = list_next( iterator )) ) {
			printf( "part_list[%d] = %s\n", i++, part);
		}
		list_iterator_destroy( iterator );
	}

	if ((params.verbose > 1) && params.state_list) {
		i = 0;
		iterator = list_iterator_create( params.state_list );
		while ( (state_id = list_next( iterator )) ) {
			printf( "state_list[%d] = %s\n",
				i++, job_state_string( *state_id ));
		}
		list_iterator_destroy( iterator );
	}

	if ((params.verbose > 1) && params.step_list) {
		i = 0;
		iterator = list_iterator_create( params.step_list );
		while ( (job_step_id = list_next( iterator )) ) {
			printf( "step_list[%d] = %u.%u\n", i++,
				job_step_id->job_id, job_step_id->step_id );
		}
		list_iterator_destroy( iterator );
	}

	if ((params.verbose > 1) && params.user_list) {
		i = 0;
		iterator = list_iterator_create( params.user_list );
		while ( (user = list_next( iterator )) ) {
			printf( "user_list[%d] = %u\n", i++, *user);
		}
		list_iterator_destroy( iterator );
	}

	printf( "-----------------------------\n\n\n" );
} ;
Пример #16
0
/*
 * slurm_sprint_job_info - output information about a specific Slurm
 *	job based upon message as loaded using slurm_load_jobs
 * IN job_ptr - an individual job information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
extern char *
slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner )
{
	int i, j, k;
	char time_str[32], *group_name, *user_name;
	char *gres_last = "", tmp1[128], tmp2[128];
	char *tmp6_ptr;
	char tmp_line[1024 * 128];
	char tmp_path[MAXPATHLEN];
	char *ionodes = NULL;
	uint16_t exit_status = 0, term_sig = 0;
	job_resources_t *job_resrcs = job_ptr->job_resrcs;
	char *out = NULL;
	time_t run_time;
	uint32_t min_nodes, max_nodes = 0;
	char *nodelist = "NodeList";
	bitstr_t *cpu_bitmap;
	char *host;
	int sock_inx, sock_reps, last;
	int abs_node_inx, rel_node_inx;
	int64_t nice;
	int bit_inx, bit_reps;
	uint64_t *last_mem_alloc_ptr = NULL;
	uint64_t last_mem_alloc = NO_VAL64;
	char *last_hosts;
	hostlist_t hl, hl_last;
	char select_buf[122];
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	uint32_t threads;
	char *line_end = (one_liner) ? " " : "\n   ";

	if (cluster_flags & CLUSTER_FLAG_BG) {
		nodelist = "MidplaneList";
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_IONODES,
					    &ionodes);
	}

	/****** Line 1 ******/
	xstrfmtcat(out, "JobId=%u ", job_ptr->job_id);

	if (job_ptr->array_job_id) {
		if (job_ptr->array_task_str) {
			xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%s ",
				   job_ptr->array_job_id,
				   job_ptr->array_task_str);
		} else {
			xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%u ",
				   job_ptr->array_job_id,
				   job_ptr->array_task_id);
		}
	}
	xstrfmtcat(out, "JobName=%s", job_ptr->name);
	xstrcat(out, line_end);

	/****** Line 2 ******/
	user_name = uid_to_string((uid_t) job_ptr->user_id);
	group_name = gid_to_string((gid_t) job_ptr->group_id);
	xstrfmtcat(out, "UserId=%s(%u) GroupId=%s(%u) MCS_label=%s",
		   user_name, job_ptr->user_id, group_name, job_ptr->group_id,
		   (job_ptr->mcs_label==NULL) ? "N/A" : job_ptr->mcs_label);
	xfree(user_name);
	xfree(group_name);
	xstrcat(out, line_end);

	/****** Line 3 ******/
	nice = ((int64_t)job_ptr->nice) - NICE_OFFSET;
	xstrfmtcat(out, "Priority=%u Nice=%"PRIi64" Account=%s QOS=%s",
		   job_ptr->priority, nice, job_ptr->account, job_ptr->qos);
	if (slurm_get_track_wckey())
		xstrfmtcat(out, " WCKey=%s", job_ptr->wckey);
	xstrcat(out, line_end);

	/****** Line 4 ******/
	xstrfmtcat(out, "JobState=%s ", job_state_string(job_ptr->job_state));

	if (job_ptr->state_desc) {
		/* Replace white space with underscore for easier parsing */
		for (j=0; job_ptr->state_desc[j]; j++) {
			if (isspace((int)job_ptr->state_desc[j]))
				job_ptr->state_desc[j] = '_';
		}
		xstrfmtcat(out, "Reason=%s ", job_ptr->state_desc);
	} else
		xstrfmtcat(out, "Reason=%s ", job_reason_string(job_ptr->state_reason));

	xstrfmtcat(out, "Dependency=%s", job_ptr->dependency);
	xstrcat(out, line_end);

	/****** Line 5 ******/
	xstrfmtcat(out, "Requeue=%u Restarts=%u BatchFlag=%u Reboot=%u ",
		 job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag,
		 job_ptr->reboot);
	if (WIFSIGNALED(job_ptr->exit_code))
		term_sig = WTERMSIG(job_ptr->exit_code);
	exit_status = WEXITSTATUS(job_ptr->exit_code);
	xstrfmtcat(out, "ExitCode=%u:%u", exit_status, term_sig);
	xstrcat(out, line_end);

	/****** Line 5a (optional) ******/
	if (job_ptr->show_flags & SHOW_DETAIL) {
		if (WIFSIGNALED(job_ptr->derived_ec))
			term_sig = WTERMSIG(job_ptr->derived_ec);
		else
			term_sig = 0;
		exit_status = WEXITSTATUS(job_ptr->derived_ec);
		xstrfmtcat(out, "DerivedExitCode=%u:%u", exit_status, term_sig);
		xstrcat(out, line_end);
	}

	/****** Line 6 ******/
	if (IS_JOB_PENDING(job_ptr))
		run_time = 0;
	else if (IS_JOB_SUSPENDED(job_ptr))
		run_time = job_ptr->pre_sus_time;
	else {
		time_t end_time;
		if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0))
			end_time = time(NULL);
		else
			end_time = job_ptr->end_time;
		if (job_ptr->suspend_time) {
			run_time = (time_t)
				(difftime(end_time, job_ptr->suspend_time)
				 + job_ptr->pre_sus_time);
		} else
			run_time = (time_t)
				difftime(end_time, job_ptr->start_time);
	}
	secs2time_str(run_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "RunTime=%s ", time_str);

	if (job_ptr->time_limit == NO_VAL)
		xstrcat(out, "TimeLimit=Partition_Limit ");
	else {
		mins2time_str(job_ptr->time_limit, time_str, sizeof(time_str));
		xstrfmtcat(out, "TimeLimit=%s ", time_str);
	}

	if (job_ptr->time_min == 0)
		xstrcat(out, "TimeMin=N/A");
	else {
		mins2time_str(job_ptr->time_min, time_str, sizeof(time_str));
		xstrfmtcat(out, "TimeMin=%s", time_str);
	}
	xstrcat(out, line_end);

	/****** Line 7 ******/
	slurm_make_time_str(&job_ptr->submit_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "SubmitTime=%s ", time_str);

	slurm_make_time_str(&job_ptr->eligible_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "EligibleTime=%s", time_str);

	xstrcat(out, line_end);

	/****** Line 8 (optional) ******/
	if (job_ptr->resize_time) {
		slurm_make_time_str(&job_ptr->resize_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "ResizeTime=%s", time_str);
		xstrcat(out, line_end);
	}

	/****** Line 9 ******/
	slurm_make_time_str(&job_ptr->start_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "StartTime=%s ", time_str);

	if ((job_ptr->time_limit == INFINITE) &&
	    (job_ptr->end_time > time(NULL)))
		xstrcat(out, "EndTime=Unknown ");
	else {
		slurm_make_time_str(&job_ptr->end_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "EndTime=%s ", time_str);
	}

	if (job_ptr->deadline) {
		slurm_make_time_str(&job_ptr->deadline, time_str, sizeof(time_str));
		xstrfmtcat(out, "Deadline=%s", time_str);
	} else {
		xstrcat(out, "Deadline=N/A");
	}

	xstrcat(out, line_end);

	/****** Line 10 ******/
	if (job_ptr->preempt_time == 0)
		xstrcat(out, "PreemptTime=None ");
	else {
		slurm_make_time_str(&job_ptr->preempt_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "PreemptTime=%s ", time_str);
	}

	if (job_ptr->suspend_time) {
		slurm_make_time_str(&job_ptr->suspend_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "SuspendTime=%s ", time_str);
	} else
		xstrcat(out, "SuspendTime=None ");

	xstrfmtcat(out, "SecsPreSuspend=%ld", (long int)job_ptr->pre_sus_time);
	xstrcat(out, line_end);

	/****** Line 11 ******/
	xstrfmtcat(out, "Partition=%s AllocNode:Sid=%s:%u",
		   job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid);
	xstrcat(out, line_end);

	/****** Line 12 ******/
	xstrfmtcat(out, "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes,
		   nodelist, job_ptr->exc_nodes);
	xstrcat(out, line_end);

	/****** Line 13 ******/
	xstrfmtcat(out, "%s=%s", nodelist, job_ptr->nodes);
	if (job_ptr->nodes && ionodes) {
		xstrfmtcat(out, "[%s]", ionodes);
		xfree(ionodes);
	}
	if (job_ptr->sched_nodes)
		xstrfmtcat(out, " Sched%s=%s", nodelist, job_ptr->sched_nodes);

	xstrcat(out, line_end);

	/****** Line 14 (optional) ******/
	if (job_ptr->batch_host) {
		xstrfmtcat(out, "BatchHost=%s", job_ptr->batch_host);
		xstrcat(out, line_end);
	}

	/****** Line 14a (optional) ******/
	if (job_ptr->fed_siblings) {
		xstrfmtcat(out, "FedOrigin=%s FedSiblings=%s",
			   job_ptr->fed_origin_str, job_ptr->fed_siblings_str);
		xstrcat(out, line_end);
	}

	/****** Line 15 ******/
	if (cluster_flags & CLUSTER_FLAG_BG) {
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &min_nodes);
		if ((min_nodes == 0) || (min_nodes == NO_VAL)) {
			min_nodes = job_ptr->num_nodes;
			max_nodes = job_ptr->max_nodes;
		} else if (job_ptr->max_nodes)
			max_nodes = min_nodes;
	} else if (IS_JOB_PENDING(job_ptr)) {
		min_nodes = job_ptr->num_nodes;
		max_nodes = job_ptr->max_nodes;
		if (max_nodes && (max_nodes < min_nodes))
			min_nodes = max_nodes;
	} else {
		min_nodes = job_ptr->num_nodes;
		max_nodes = 0;
	}

	_sprint_range(tmp_line, sizeof(tmp_line), min_nodes, max_nodes);
	xstrfmtcat(out, "NumNodes=%s ", tmp_line);
	_sprint_range(tmp_line, sizeof(tmp_line), job_ptr->num_cpus, job_ptr->max_cpus);
	xstrfmtcat(out, "NumCPUs=%s ", tmp_line);

	xstrfmtcat(out, "NumTasks=%u ", job_ptr->num_tasks);
	xstrfmtcat(out, "CPUs/Task=%u ", job_ptr->cpus_per_task);

	if (job_ptr->boards_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "ReqB:S:C:T=*:");
	else
		xstrfmtcat(out, "ReqB:S:C:T=%u:", job_ptr->boards_per_node);

	if (job_ptr->sockets_per_board == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->sockets_per_board);

	if (job_ptr->cores_per_socket == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->cores_per_socket);

	if (job_ptr->threads_per_core == (uint16_t) NO_VAL)
		xstrcat(out, "*");
	else
		xstrfmtcat(out, "%u", job_ptr->threads_per_core);

	xstrcat(out, line_end);

	/****** Line 16 ******/
	/* Tres should already of been converted at this point from simple */
	xstrfmtcat(out, "TRES=%s",
		   job_ptr->tres_alloc_str ? job_ptr->tres_alloc_str
					   : job_ptr->tres_req_str);
	xstrcat(out, line_end);

	/****** Line 17 ******/
	if (job_ptr->sockets_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "Socks/Node=* ");
	else
		xstrfmtcat(out, "Socks/Node=%u ", job_ptr->sockets_per_node);

	if (job_ptr->ntasks_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "NtasksPerN:B:S:C=*:");
	else
		xstrfmtcat(out, "NtasksPerN:B:S:C=%u:", job_ptr->ntasks_per_node);

	if (job_ptr->ntasks_per_board == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->ntasks_per_board);

	if ((job_ptr->ntasks_per_socket == (uint16_t) NO_VAL) ||
	    (job_ptr->ntasks_per_socket == (uint16_t) INFINITE))
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->ntasks_per_socket);

	if ((job_ptr->ntasks_per_core == (uint16_t) NO_VAL) ||
	    (job_ptr->ntasks_per_core == (uint16_t) INFINITE))
		xstrcat(out, "* ");
	else
		xstrfmtcat(out, "%u ", job_ptr->ntasks_per_core);

	if (job_ptr->core_spec == (uint16_t) NO_VAL)
		xstrcat(out, "CoreSpec=*");
	else if (job_ptr->core_spec & CORE_SPEC_THREAD)
		xstrfmtcat(out, "ThreadSpec=%d",
			   (job_ptr->core_spec & (~CORE_SPEC_THREAD)));
	else
		xstrfmtcat(out, "CoreSpec=%u", job_ptr->core_spec);

	xstrcat(out, line_end);

	if (job_resrcs && cluster_flags & CLUSTER_FLAG_BG) {
		if ((job_resrcs->cpu_array_cnt > 0) &&
		    (job_resrcs->cpu_array_value) &&
		    (job_resrcs->cpu_array_reps)) {
			int length = 0;
			xstrcat(out, "CPUs=");
			for (i = 0; i < job_resrcs->cpu_array_cnt; i++) {
				/* only print 60 characters worth of this record */
				if (length > 60) {
					/* skip to last CPU group entry */
					if (i < job_resrcs->cpu_array_cnt - 1) {
						continue;
					}
					/* add ellipsis before last entry */
					xstrcat(out, "...,");
				}

				length += xstrfmtcat(out, "%d", job_resrcs->cpus[i]);
				if (job_resrcs->cpu_array_reps[i] > 1) {
					length += xstrfmtcat(out, "*%d",
							     job_resrcs->cpu_array_reps[i]);
				}
				if (i < job_resrcs->cpu_array_cnt - 1) {
					xstrcat(out, ",");
					length++;
				}
			}
			xstrcat(out, line_end);
		}
	} else if (job_resrcs && job_resrcs->core_bitmap &&
		   ((last = bit_fls(job_resrcs->core_bitmap)) != -1)) {
		hl = hostlist_create(job_resrcs->nodes);
		if (!hl) {
			error("slurm_sprint_job_info: hostlist_create: %s",
			      job_resrcs->nodes);
			return NULL;
		}
		hl_last = hostlist_create(NULL);
		if (!hl_last) {
			error("slurm_sprint_job_info: hostlist_create: NULL");
			hostlist_destroy(hl);
			return NULL;
		}

		bit_inx = 0;
		i = sock_inx = sock_reps = 0;
		abs_node_inx = job_ptr->node_inx[i];

		gres_last = "";
		/* tmp1[] stores the current cpu(s) allocated */
		tmp2[0] = '\0';	/* stores last cpu(s) allocated */
		for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts;
		     rel_node_inx++) {

			if (sock_reps >=
			    job_resrcs->sock_core_rep_count[sock_inx]) {
				sock_inx++;
				sock_reps = 0;
			}
			sock_reps++;

			bit_reps = job_resrcs->sockets_per_node[sock_inx] *
				   job_resrcs->cores_per_socket[sock_inx];
			host = hostlist_shift(hl);
			threads = _threads_per_core(host);
			cpu_bitmap = bit_alloc(bit_reps * threads);
			for (j = 0; j < bit_reps; j++) {
				if (bit_test(job_resrcs->core_bitmap, bit_inx)){
					for (k = 0; k < threads; k++)
						bit_set(cpu_bitmap,
							(j * threads) + k);
				}
				bit_inx++;
			}
			bit_fmt(tmp1, sizeof(tmp1), cpu_bitmap);
			FREE_NULL_BITMAP(cpu_bitmap);
			/*
			 * If the allocation values for this host are not the
			 * same as the last host, print the report of the last
			 * group of hosts that had identical allocation values.
			 */
			if (xstrcmp(tmp1, tmp2) ||
			    ((rel_node_inx < job_ptr->gres_detail_cnt) &&
			     xstrcmp(job_ptr->gres_detail_str[rel_node_inx],
				     gres_last)) ||
			    (last_mem_alloc_ptr != job_resrcs->memory_allocated) ||
			    (job_resrcs->memory_allocated &&
			     (last_mem_alloc !=
			      job_resrcs->memory_allocated[rel_node_inx]))) {
				if (hostlist_count(hl_last)) {
					last_hosts =
						hostlist_ranged_string_xmalloc(
						hl_last);
					xstrfmtcat(out,
						   "  Nodes=%s CPU_IDs=%s "
						   "Mem=%"PRIu64" GRES_IDX=%s",
						   last_hosts, tmp2,
						   last_mem_alloc_ptr ?
						   last_mem_alloc : 0,
						    gres_last);
					xfree(last_hosts);
					xstrcat(out, line_end);

					hostlist_destroy(hl_last);
					hl_last = hostlist_create(NULL);
				}
				strcpy(tmp2, tmp1);
				if (rel_node_inx < job_ptr->gres_detail_cnt) {
					gres_last = job_ptr->
						    gres_detail_str[rel_node_inx];
				} else {
					gres_last = "";
				}
				last_mem_alloc_ptr = job_resrcs->memory_allocated;
				if (last_mem_alloc_ptr)
					last_mem_alloc = job_resrcs->
						memory_allocated[rel_node_inx];
				else
					last_mem_alloc = NO_VAL64;
			}
			hostlist_push_host(hl_last, host);
			free(host);

			if (bit_inx > last)
				break;

			if (abs_node_inx > job_ptr->node_inx[i+1]) {
				i += 2;
				abs_node_inx = job_ptr->node_inx[i];
			} else {
				abs_node_inx++;
			}
		}

		if (hostlist_count(hl_last)) {
			last_hosts = hostlist_ranged_string_xmalloc(hl_last);
			xstrfmtcat(out, "  Nodes=%s CPU_IDs=%s Mem=%"PRIu64" GRES_IDX=%s",
				 last_hosts, tmp2,
				 last_mem_alloc_ptr ? last_mem_alloc : 0,
				 gres_last);
			xfree(last_hosts);
			xstrcat(out, line_end);
		}
		hostlist_destroy(hl);
		hostlist_destroy(hl_last);
	}
	/****** Line 18 ******/
	if (job_ptr->pn_min_memory & MEM_PER_CPU) {
		job_ptr->pn_min_memory &= (~MEM_PER_CPU);
		tmp6_ptr = "CPU";
	} else
		tmp6_ptr = "Node";

	if (cluster_flags & CLUSTER_FLAG_BG) {
		convert_num_unit((float)job_ptr->pn_min_cpus, tmp1,
				 sizeof(tmp1), UNIT_NONE, NO_VAL,
				 CONVERT_NUM_UNIT_EXACT);
		xstrfmtcat(out, "MinCPUsNode=%s ", tmp1);
	} else {
		xstrfmtcat(out, "MinCPUsNode=%u ", job_ptr->pn_min_cpus);
	}

	convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1),
			 UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT);
	convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2),
			 UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT);
	xstrfmtcat(out, "MinMemory%s=%s MinTmpDiskNode=%s", tmp6_ptr, tmp1, tmp2);
	xstrcat(out, line_end);

	/****** Line ******/
	secs2time_str((time_t)job_ptr->delay_boot, tmp1, sizeof(tmp1));
	xstrfmtcat(out, "Features=%s DelayBoot=%s", job_ptr->features, tmp1);
	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "Gres=%s Reservation=%s",
		   job_ptr->gres, job_ptr->resv_name);
	xstrcat(out, line_end);

	/****** Line 20 ******/
	xstrfmtcat(out, "OverSubscribe=%s Contiguous=%d Licenses=%s Network=%s",
		   job_share_string(job_ptr->shared), job_ptr->contiguous,
		   job_ptr->licenses, job_ptr->network);
	xstrcat(out, line_end);

	/****** Line 21 ******/
	xstrfmtcat(out, "Command=%s", job_ptr->command);
	xstrcat(out, line_end);

	/****** Line 22 ******/
	xstrfmtcat(out, "WorkDir=%s", job_ptr->work_dir);

	if (cluster_flags & CLUSTER_FLAG_BG) {
		/****** Line 23 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_BG_ID);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "Block_ID=%s", select_buf);
		}

		/****** Line 24 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MIXED_SHORT);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrcat(out, select_buf);
		}

		/****** Line 26 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_LINUX_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "CnloadImage=%s", select_buf);
		}
		/****** Line 27 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MLOADER_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "MloaderImage=%s", select_buf);
		}
		/****** Line 28 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_RAMDISK_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "IoloadImage=%s", select_buf);
		}
	}

	/****** Line (optional) ******/
	if (job_ptr->admin_comment) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "AdminComment=%s ", job_ptr->admin_comment);
	}

	/****** Line (optional) ******/
	if (job_ptr->comment) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "Comment=%s ", job_ptr->comment);
	}

	/****** Line 30 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stderr(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdErr=%s", tmp_path);
	}

	/****** Line 31 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stdin(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdIn=%s", tmp_path);
	}

	/****** Line 32 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stdout(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdOut=%s", tmp_path);
	}

	/****** Line 33 (optional) ******/
	if (job_ptr->batch_script) {
		xstrcat(out, line_end);
		xstrcat(out, "BatchScript=\n");
		xstrcat(out, job_ptr->batch_script);
	}

	/****** Line 34 (optional) ******/
	if (job_ptr->req_switch) {
		char time_buf[32];
		xstrcat(out, line_end);
		secs2time_str((time_t) job_ptr->wait4switch, time_buf,
			      sizeof(time_buf));
		xstrfmtcat(out, "Switches=%u@%s\n", job_ptr->req_switch, time_buf);
	}

	/****** Line 35 (optional) ******/
	if (job_ptr->burst_buffer) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "BurstBuffer=%s", job_ptr->burst_buffer);
	}

	/****** Line (optional) ******/
	if (job_ptr->burst_buffer_state) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "BurstBufferState=%s",
			   job_ptr->burst_buffer_state);
	}

	/****** Line 36 (optional) ******/
	if (cpu_freq_debug(NULL, NULL, tmp1, sizeof(tmp1),
			   job_ptr->cpu_freq_gov, job_ptr->cpu_freq_min,
			   job_ptr->cpu_freq_max, NO_VAL) != 0) {
		xstrcat(out, line_end);
		xstrcat(out, tmp1);
	}

	/****** Line 37 ******/
	xstrcat(out, line_end);
	xstrfmtcat(out, "Power=%s", power_flags_str(job_ptr->power_flags));

	/****** Line 38 (optional) ******/
	if (job_ptr->bitflags) {
		xstrcat(out, line_end);
		if (job_ptr->bitflags & GRES_ENFORCE_BIND)
			xstrcat(out, "GresEnforceBind=Yes");
		if (job_ptr->bitflags & KILL_INV_DEP)
			xstrcat(out, "KillOInInvalidDependent=Yes");
		if (job_ptr->bitflags & NO_KILL_INV_DEP)
			xstrcat(out, "KillOInInvalidDependent=No");
		if (job_ptr->bitflags & SPREAD_JOB)
			xstrcat(out, "SpreadJob=Yes");
	}

	/****** END OF JOB RECORD ******/
	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;
}
Пример #17
0
/*
 * slurm_sprint_job_step_info - output information about a specific Slurm
 *	job step based upon message as loaded using slurm_get_job_steps
 * IN job_ptr - an individual job step information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
char *
slurm_sprint_job_step_info ( job_step_info_t * job_step_ptr,
			    int one_liner )
{
	char tmp_node_cnt[40];
	char time_str[32];
	char limit_str[32];
	char tmp_line[128];
	char *out = NULL;
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();

	/****** Line 1 ******/
	slurm_make_time_str ((time_t *)&job_step_ptr->start_time, time_str,
		sizeof(time_str));
	if (job_step_ptr->time_limit == INFINITE)
		sprintf(limit_str, "UNLIMITED");
	else
		secs2time_str ((time_t)job_step_ptr->time_limit * 60,
				limit_str, sizeof(limit_str));
	if (job_step_ptr->array_job_id) {
		if (job_step_ptr->step_id == INFINITE) {	/* Pending */
			snprintf(tmp_line, sizeof(tmp_line),
				 "StepId=%u_%u.TBD ",
				 job_step_ptr->array_job_id,
				 job_step_ptr->array_task_id);
		} else {
			snprintf(tmp_line, sizeof(tmp_line), "StepId=%u_%u.%u ",
				 job_step_ptr->array_job_id,
				 job_step_ptr->array_task_id,
				 job_step_ptr->step_id);
		}
		out = xstrdup(tmp_line);
	} else {
		if (job_step_ptr->step_id == INFINITE) {	/* Pending */
			snprintf(tmp_line, sizeof(tmp_line), "StepId=%u.TBD ",
				 job_step_ptr->job_id);
		} else {
			snprintf(tmp_line, sizeof(tmp_line), "StepId=%u.%u ",
				 job_step_ptr->job_id, job_step_ptr->step_id);
		}
		out = xstrdup(tmp_line);
	}
	snprintf(tmp_line, sizeof(tmp_line),
		 "UserId=%u StartTime=%s TimeLimit=%s",
		 job_step_ptr->user_id, time_str, limit_str);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 2 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		 "State=%s ",
		 job_state_string(job_step_ptr->state));
	xstrcat(out, tmp_line);
	if (cluster_flags & CLUSTER_FLAG_BG) {
		char *io_nodes = NULL;
		select_g_select_jobinfo_get(job_step_ptr->select_jobinfo,
					    SELECT_JOBDATA_IONODES,
					    &io_nodes);
		if (io_nodes) {
			snprintf(tmp_line, sizeof(tmp_line),
				 "Partition=%s MidplaneList=%s[%s] Gres=%s",
				 job_step_ptr->partition,
				 job_step_ptr->nodes, io_nodes,
				 job_step_ptr->gres);
			xfree(io_nodes);
		} else
			snprintf(tmp_line, sizeof(tmp_line),
				 "Partition=%s MidplaneList=%s Gres=%s",
				 job_step_ptr->partition,
				 job_step_ptr->nodes,
				 job_step_ptr->gres);
	} else {
		snprintf(tmp_line, sizeof(tmp_line),
			"Partition=%s NodeList=%s Gres=%s",
			job_step_ptr->partition, job_step_ptr->nodes,
			job_step_ptr->gres);
	}
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 3 ******/
	if (cluster_flags & CLUSTER_FLAG_BGQ) {
		uint32_t nodes = 0;
		select_g_select_jobinfo_get(job_step_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &nodes);
		convert_num_unit((float)nodes, tmp_node_cnt,
				 sizeof(tmp_node_cnt), UNIT_NONE,
				 CONVERT_NUM_UNIT_EXACT);
	} else {
		convert_num_unit((float)_nodes_in_list(job_step_ptr->nodes),
				 tmp_node_cnt, sizeof(tmp_node_cnt),
				 UNIT_NONE, CONVERT_NUM_UNIT_EXACT);
	}

	snprintf(tmp_line, sizeof(tmp_line),
		"Nodes=%s CPUs=%u Tasks=%u Name=%s Network=%s",
		 tmp_node_cnt, job_step_ptr->num_cpus, job_step_ptr->num_tasks,
		 job_step_ptr->name, job_step_ptr->network);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 4 ******/
	snprintf(tmp_line, sizeof(tmp_line), "TRES=%s",
		 job_step_ptr->tres_alloc_str);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 5 ******/
	snprintf(tmp_line, sizeof(tmp_line),
		"ResvPorts=%s Checkpoint=%u CheckpointDir=%s",
		 job_step_ptr->resv_ports,
		 job_step_ptr->ckpt_interval, job_step_ptr->ckpt_dir);
	xstrcat(out, tmp_line);
	if (one_liner)
		xstrcat(out, " ");
	else
		xstrcat(out, "\n   ");

	/****** Line 6 ******/
	if (cpu_freq_debug(NULL, NULL, tmp_line, sizeof(tmp_line),
			   job_step_ptr->cpu_freq_gov,
			   job_step_ptr->cpu_freq_min,
			   job_step_ptr->cpu_freq_max, NO_VAL) != 0) {
		xstrcat(out, tmp_line);
	} else {
		xstrcat(out, "CPUFreqReq=Default");
	}
	xstrfmtcat(out, " Dist=%s",
		   slurm_step_layout_type_name(job_step_ptr->task_dist));
	xstrcat(out, "\n\n");

	return out;
}
Пример #18
0
extern int slurm_jobcomp_log_record ( struct job_record *job_ptr )
{
	int rc = SLURM_SUCCESS;
	char job_rec[1024];
	char usr_str[32], grp_str[32], start_str[32], end_str[32], lim_str[32];
	char select_buf[128], *state_string, *work_dir;
	size_t offset = 0, tot_size, wrote;
	enum job_states job_state;
	uint32_t time_limit;

	if ((log_name == NULL) || (job_comp_fd < 0)) {
		error("JobCompLoc log file %s not open", log_name);
		return SLURM_ERROR;
	}

	slurm_mutex_lock( &file_lock );
	_get_user_name(job_ptr->user_id, usr_str, sizeof(usr_str));
	_get_group_name(job_ptr->group_id, grp_str, sizeof(grp_str));

	if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr)
		time_limit = job_ptr->part_ptr->max_time;
	else
		time_limit = job_ptr->time_limit;
	if (time_limit == INFINITE)
		strcpy(lim_str, "UNLIMITED");
	else {
		snprintf(lim_str, sizeof(lim_str), "%lu",
			 (unsigned long) time_limit);
	}

	if (job_ptr->job_state & JOB_RESIZING) {
		time_t now = time(NULL);
		state_string = job_state_string(job_ptr->job_state);
		if (job_ptr->resize_time) {
			_make_time_str(&job_ptr->resize_time, start_str,
				       sizeof(start_str));
		} else {
			_make_time_str(&job_ptr->start_time, start_str,
				       sizeof(start_str));
		}
		_make_time_str(&now, end_str, sizeof(end_str));
	} else {
		/* Job state will typically have JOB_COMPLETING or JOB_RESIZING
		 * flag set when called. We remove the flags to get the eventual
		 * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */
		job_state = job_ptr->job_state & JOB_STATE_BASE;
		state_string = job_state_string(job_state);
		if (job_ptr->resize_time) {
			_make_time_str(&job_ptr->resize_time, start_str,
				       sizeof(start_str));
		} else if (job_ptr->start_time > job_ptr->end_time) {
			/* Job cancelled while pending and
			 * expected start time is in the future. */
			snprintf(start_str, sizeof(start_str), "Unknown");
		} else {
			_make_time_str(&job_ptr->start_time, start_str,
				       sizeof(start_str));
		}
		_make_time_str(&job_ptr->end_time, end_str, sizeof(end_str));
	}

	if (job_ptr->details && job_ptr->details->work_dir)
		work_dir = job_ptr->details->work_dir;
	else
		work_dir = "unknown";

	select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
		select_buf, sizeof(select_buf), SELECT_PRINT_MIXED);

	snprintf(job_rec, sizeof(job_rec), JOB_FORMAT,
		 (unsigned long) job_ptr->job_id, usr_str,
		 (unsigned long) job_ptr->user_id, grp_str,
		 (unsigned long) job_ptr->group_id, job_ptr->name,
		 state_string, job_ptr->partition, lim_str, start_str,
		 end_str, job_ptr->nodes, job_ptr->node_cnt,
		 job_ptr->total_cpus, work_dir,
		 select_buf);
	tot_size = strlen(job_rec);

	while ( offset < tot_size ) {
		wrote = write(job_comp_fd, job_rec + offset,
			tot_size - offset);
		if (wrote == -1) {
			if (errno == EAGAIN)
				continue;
			else {
				plugin_errno = errno;
				rc = SLURM_ERROR;
				break;
			}
		}
		offset += wrote;
	}
	slurm_mutex_unlock( &file_lock );
	return rc;
}
Пример #19
0
void print_fields(type_t type, void *object)
{
	if (!object) {
		fatal ("Job or step record is NULL");
		return;
	}

	slurmdb_job_rec_t *job = (slurmdb_job_rec_t *)object;
	slurmdb_step_rec_t *step = (slurmdb_step_rec_t *)object;
	jobcomp_job_rec_t *job_comp = (jobcomp_job_rec_t *)object;
	print_field_t *field = NULL;
	int curr_inx = 1;
	struct passwd *pw = NULL;
	struct	group *gr = NULL;
	char outbuf[FORMAT_STRING_SIZE];
	bool got_stats = false;
	int cpu_tres_rec_count = 0;
	int step_cpu_tres_rec_count = 0;

	switch(type) {
	case JOB:
		step = NULL;
		if (!job->track_steps)
			step = (slurmdb_step_rec_t *)job->first_step_ptr;
		/* set this to avoid printing out info for things that
		   don't mean anything.  Like an allocation that never
		   ran anything.
		*/
		if (!step)
			job->track_steps = 1;
		else
			step_cpu_tres_rec_count =
				slurmdb_find_tres_count_in_string(
					step->tres_alloc_str, TRES_CPU);

		if (job->stats.cpu_min != NO_VAL)
			got_stats = true;

		job_comp = NULL;
		cpu_tres_rec_count = slurmdb_find_tres_count_in_string(
			(job->tres_alloc_str && job->tres_alloc_str[0]) ?
			job->tres_alloc_str : job->tres_req_str,
			TRES_CPU);
		break;
	case JOBSTEP:
		job = step->job_ptr;

		if (step->stats.cpu_min != NO_VAL)
			got_stats = true;

		if ((step_cpu_tres_rec_count =
		     slurmdb_find_tres_count_in_string(
			     step->tres_alloc_str, TRES_CPU)) == INFINITE64)
			step_cpu_tres_rec_count =
				slurmdb_find_tres_count_in_string(
					(job->tres_alloc_str &&
					 job->tres_alloc_str[0]) ?
					job->tres_alloc_str : job->tres_req_str,
					TRES_CPU);

		job_comp = NULL;
		break;
	case JOBCOMP:
		job = NULL;
		step = NULL;
		break;
	default:
		break;
	}

	if ((uint64_t)cpu_tres_rec_count == INFINITE64)
		cpu_tres_rec_count = 0;

	if ((uint64_t)step_cpu_tres_rec_count == INFINITE64)
		step_cpu_tres_rec_count = 0;

	list_iterator_reset(print_fields_itr);
	while((field = list_next(print_fields_itr))) {
		char *tmp_char = NULL, id[FORMAT_STRING_SIZE];
		int tmp_int = NO_VAL, tmp_int2 = NO_VAL;
		double tmp_dub = (double)NO_VAL; /* don't use NO_VAL64
						    unless we can
						    confirm the values
						    coming in are
						    NO_VAL64 */
		uint32_t tmp_uint32 = NO_VAL;
		uint64_t tmp_uint64 = NO_VAL64;

		memset(&outbuf, 0, sizeof(outbuf));
		switch(field->type) {
		case PRINT_ALLOC_CPUS:
			switch(type) {
			case JOB:
				tmp_int = cpu_tres_rec_count;

				// we want to use the step info
				if (!step)
					break;
			case JOBSTEP:
				tmp_int = step_cpu_tres_rec_count;
				break;
			case JOBCOMP:
			default:
				tmp_int = job_comp->proc_cnt;
				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_ALLOC_GRES:
			switch(type) {
			case JOB:
				tmp_char = job->alloc_gres;
				break;
			case JOBSTEP:
				tmp_char = step->job_ptr->alloc_gres;
				break;
			case JOBCOMP:
			default:
				tmp_char = NULL;
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_ALLOC_NODES:
			switch(type) {
			case JOB:
				tmp_int = job->alloc_nodes;
				tmp_char = job->tres_alloc_str;
				break;
			case JOBSTEP:
				tmp_int = step->nnodes;
				tmp_char = step->tres_alloc_str;
				break;
			case JOBCOMP:
				tmp_int = job_comp->node_cnt;
				break;
			default:
				break;
			}

			if (!tmp_int && tmp_char) {
				if ((tmp_uint64 =
				     slurmdb_find_tres_count_in_string(
					     tmp_char, TRES_NODE))
				    != INFINITE64)
					tmp_int = tmp_uint64;
			}
			convert_num_unit((double)tmp_int, outbuf,
					 sizeof(outbuf), UNIT_NONE, NO_VAL,
					 params.convert_flags);
			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_ACCOUNT:
			switch(type) {
			case JOB:
				tmp_char = job->account;
				break;
			case JOBSTEP:
				tmp_char = step->job_ptr->account;
				break;
			case JOBCOMP:
			default:
				tmp_char = "n/a";
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_ACT_CPUFREQ:
			if (got_stats) {
				switch (type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub =
							step->stats.act_cpufreq;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.act_cpufreq;
					break;
				default:
					break;
				}
			}
			if (!fuzzy_equal(tmp_dub, NO_VAL))
				convert_num_unit2((double)tmp_dub, outbuf,
						  sizeof(outbuf), UNIT_KILO,
						  params.units, 1000,
						  params.convert_flags &
						  (~CONVERT_NUM_UNIT_EXACT));

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_ASSOCID:
			switch(type) {
			case JOB:
				tmp_int = job->associd;
				break;
			case JOBSTEP:
				tmp_int = step->job_ptr->associd;
				break;
			case JOBCOMP:
			default:
				tmp_int = NO_VAL;
				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_AVECPU:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub = job->stats.cpu_ave;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.cpu_ave;
					break;
				case JOBCOMP:
				default:
					break;
				}
			}

			if (!fuzzy_equal(tmp_dub, NO_VAL))
				tmp_char = _elapsed_time((long)tmp_dub, 0);

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_AVEDISKREAD:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub = job->
							stats.disk_read_ave;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.disk_read_ave;
					break;
				case JOBCOMP:
				default:
					break;
				}
			}
			_print_small_double(outbuf, sizeof(outbuf),
					    tmp_dub, UNIT_MEGA);

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_AVEDISKWRITE:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub = job->
							stats.disk_write_ave;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.disk_write_ave;
					break;
				case JOBCOMP:
				default:
					break;
				}
			}
			_print_small_double(outbuf, sizeof(outbuf),
					    tmp_dub, UNIT_MEGA);

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_AVEPAGES:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub = job->stats.pages_ave;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.pages_ave;
					break;
				case JOBCOMP:
				default:
					break;
				}
			}
			if (!fuzzy_equal(tmp_dub, NO_VAL))
				convert_num_unit((double)tmp_dub, outbuf,
						 sizeof(outbuf), UNIT_KILO,
						 params.units,
						 params.convert_flags);

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_AVERSS:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub = job->stats.rss_ave;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.rss_ave;
					break;
				case JOBCOMP:
				default:
					break;
				}
			}
			if (!fuzzy_equal(tmp_dub, NO_VAL))
				convert_num_unit((double)tmp_dub, outbuf,
						 sizeof(outbuf), UNIT_KILO,
						 params.units,
						 params.convert_flags);

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_AVEVSIZE:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub = job->stats.vsize_ave;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.vsize_ave;
					break;
				case JOBCOMP:
				default:
					break;
				}
			}

			if (!fuzzy_equal(tmp_dub, NO_VAL))
				convert_num_unit((double)tmp_dub, outbuf,
						 sizeof(outbuf), UNIT_KILO,
						 params.units,
						 params.convert_flags);

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_BLOCKID:
			switch(type) {
			case JOB:
				tmp_char = job->blockid;
				break;
			case JOBSTEP:
				break;
			case JOBCOMP:
				tmp_char = job_comp->blockid;
				break;
			default:
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_CLUSTER:
			switch(type) {
			case JOB:
				tmp_char = job->cluster;
				break;
			case JOBSTEP:
				tmp_char = step->job_ptr->cluster;
				break;
			case JOBCOMP:
			default:
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_COMMENT:
			switch(type) {
			case JOB:
				tmp_char = job->derived_es;
				break;
			case JOBSTEP:
			case JOBCOMP:
			default:
				tmp_char = NULL;
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_CONSUMED_ENERGY:
			if (got_stats) {
				switch (type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub = step->
							stats.consumed_energy;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.consumed_energy;
					break;
				default:
					break;
				}
			}
			if (!fuzzy_equal(tmp_dub, NO_VAL))
				convert_num_unit2((double)tmp_dub, outbuf,
						  sizeof(outbuf), UNIT_NONE,
						  params.units, 1000,
						  params.convert_flags &
						  (~CONVERT_NUM_UNIT_EXACT));

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_CONSUMED_ENERGY_RAW:
			if (got_stats) {
				switch (type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub = step->
							stats.consumed_energy;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.consumed_energy;
					break;
				default:
					break;
				}
			}

			field->print_routine(field,
					     tmp_dub,
					     (curr_inx == field_count));
			break;
		case PRINT_CPU_TIME:
			switch(type) {
			case JOB:
				tmp_uint64 = (uint64_t)job->elapsed
					* (uint64_t)cpu_tres_rec_count;
				break;
			case JOBSTEP:
				tmp_uint64 = (uint64_t)step->elapsed
					* (uint64_t)step_cpu_tres_rec_count;
				break;
			case JOBCOMP:
				break;
			default:
				break;
			}
			field->print_routine(field,
					     tmp_uint64,
					     (curr_inx == field_count));
			break;
		case PRINT_CPU_TIME_RAW:
			switch(type) {
			case JOB:
				tmp_uint64 = (uint64_t)job->elapsed
					* (uint64_t)cpu_tres_rec_count;
				break;
			case JOBSTEP:
				tmp_uint64 = (uint64_t)step->elapsed
					* (uint64_t)step_cpu_tres_rec_count;
				break;
			case JOBCOMP:
				break;
			default:
				break;
			}
			field->print_routine(field,
					     tmp_uint64,
					     (curr_inx == field_count));
			break;
		case PRINT_DERIVED_EC:
			tmp_int2 = 0;
			switch(type) {
			case JOB:
				tmp_int = job->derived_ec;
				if (tmp_int == NO_VAL)
					tmp_int = 0;
				if (WIFSIGNALED(tmp_int))
					tmp_int2 = WTERMSIG(tmp_int);

				snprintf(outbuf, sizeof(outbuf), "%d:%d",
					 WEXITSTATUS(tmp_int), tmp_int2);
				break;
			case JOBSTEP:
			case JOBCOMP:
			default:
				outbuf[0] = '\0';
				break;
			}

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_ELAPSED:
			switch(type) {
			case JOB:
				tmp_int = job->elapsed;
				break;
			case JOBSTEP:
				tmp_int = step->elapsed;
				break;
			case JOBCOMP:
				tmp_int = job_comp->elapsed_time;
				break;
			default:
				tmp_int = NO_VAL;
				break;
			}
			field->print_routine(field,
					     (uint64_t)tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_ELIGIBLE:
			switch(type) {
			case JOB:
				tmp_int = job->eligible;
				break;
			case JOBSTEP:
				tmp_int = step->start;
				break;
			case JOBCOMP:
				break;
			default:
				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_END:
			switch(type) {
			case JOB:
				tmp_int = job->end;
				break;
			case JOBSTEP:
				tmp_int = step->end;
				break;
			case JOBCOMP:
				tmp_int = parse_time(job_comp->end_time, 1);
				break;
			default:
				tmp_int = NO_VAL;
				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_EXITCODE:
			tmp_int = 0;
			tmp_int2 = 0;
			switch(type) {
			case JOB:
				tmp_int = job->exitcode;
				break;
			case JOBSTEP:
				tmp_int = step->exitcode;
				break;
			case JOBCOMP:
			default:
				break;
			}
			if (tmp_int != NO_VAL) {
				if (WIFSIGNALED(tmp_int))
					tmp_int2 = WTERMSIG(tmp_int);
				tmp_int = WEXITSTATUS(tmp_int);
				if (tmp_int >= 128)
					tmp_int -= 128;
			}
			snprintf(outbuf, sizeof(outbuf), "%d:%d",
				 tmp_int, tmp_int2);

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_GID:
			switch(type) {
			case JOB:
				tmp_int = job->gid;
				break;
			case JOBSTEP:
				tmp_int = NO_VAL;
				break;
			case JOBCOMP:
				tmp_int = job_comp->gid;
				break;
			default:
				tmp_int = NO_VAL;
				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_GROUP:
			switch(type) {
			case JOB:
				tmp_int = job->gid;
				break;
			case JOBSTEP:
				tmp_int = NO_VAL;
				break;
			case JOBCOMP:
				tmp_int = job_comp->gid;
				break;
			default:
				tmp_int = NO_VAL;
				break;
			}
			tmp_char = NULL;
			if ((gr=getgrgid(tmp_int)))
				tmp_char=gr->gr_name;

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_JOBID:
			if (type == JOBSTEP)
				job = step->job_ptr;

			if (job) {
				if (job->array_task_str) {
					_xlate_task_str(job);
					snprintf(id, FORMAT_STRING_SIZE,
						 "%u_[%s]",
						 job->array_job_id,
						 job->array_task_str);
				} else if (job->array_task_id != NO_VAL)
					snprintf(id, FORMAT_STRING_SIZE,
						 "%u_%u",
						 job->array_job_id,
						 job->array_task_id);
				else
					snprintf(id, FORMAT_STRING_SIZE,
						 "%u",
						 job->jobid);
			}

			switch (type) {
			case JOB:
				tmp_char = xstrdup(id);
				break;
			case JOBSTEP:
				if (step->stepid == SLURM_BATCH_SCRIPT) {
					tmp_char = xstrdup_printf(
						"%s.batch", id);
				} else if (step->stepid == SLURM_EXTERN_CONT) {
					tmp_char = xstrdup_printf(
						"%s.extern", id);
				} else {
					tmp_char = xstrdup_printf(
						"%s.%u",
						id, step->stepid);
				}
				break;
			case JOBCOMP:
				tmp_char = xstrdup_printf("%u",
							  job_comp->jobid);
				break;
			default:
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_JOBIDRAW:
			switch (type) {
			case JOB:
				tmp_char = xstrdup_printf("%u", job->jobid);
				break;
			case JOBSTEP:
				if (step->stepid == SLURM_BATCH_SCRIPT) {
					tmp_char = xstrdup_printf(
						"%u.batch",
						step->job_ptr->jobid);
				} else if (step->stepid == SLURM_EXTERN_CONT) {
					tmp_char = xstrdup_printf(
						"%u.extern",
						step->job_ptr->jobid);
				} else {
					tmp_char = xstrdup_printf(
						"%u.%u",
						step->job_ptr->jobid,
						step->stepid);
				}
				break;
			case JOBCOMP:
				tmp_char = xstrdup_printf("%u",
							  job_comp->jobid);
				break;
			default:
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_JOBNAME:
			switch(type) {
			case JOB:
				tmp_char = job->jobname;
				break;
			case JOBSTEP:
				tmp_char = step->stepname;
				break;
			case JOBCOMP:
				tmp_char = job_comp->jobname;
				break;
			default:
				tmp_char = NULL;
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_LAYOUT:
			switch(type) {
			case JOB:
				/* below really should be step.  It is
				   not a typo */
				if (!job->track_steps)
					tmp_char = slurm_step_layout_type_name(
						step->task_dist);
				break;
			case JOBSTEP:
				tmp_char = slurm_step_layout_type_name(
					step->task_dist);
				break;
			case JOBCOMP:
				break;
			default:
				tmp_char = NULL;
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_MAXDISKREAD:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub = job->
							stats.disk_read_max;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.disk_read_max;
					break;
				case JOBCOMP:
				default:
					break;
				}
			}
			_print_small_double(outbuf, sizeof(outbuf),
					    tmp_dub, UNIT_MEGA);

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_MAXDISKREADNODE:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_char = find_hostname(
							job->stats.
							disk_read_max_nodeid,
							job->nodes);
					break;
				case JOBSTEP:
					tmp_char = find_hostname(
						step->stats.
						disk_read_max_nodeid,
						step->nodes);
					break;
				case JOBCOMP:
				default:
					tmp_char = NULL;
					break;
				}
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_MAXDISKREADTASK:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_uint32 =
							job->stats.
							disk_read_max_taskid;
					break;
				case JOBSTEP:
					tmp_uint32 = step->stats.
						disk_read_max_taskid;
					break;
				case JOBCOMP:
				default:
					tmp_uint32 = NO_VAL;
					break;
				}
			}
			if (tmp_uint32 == (uint32_t)NO_VAL)
				tmp_uint32 = NO_VAL;
			field->print_routine(field,
					     tmp_uint32,
					     (curr_inx == field_count));
			break;
		case PRINT_MAXDISKWRITE:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub = job->stats.
							disk_write_max;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.disk_write_max;
					break;
				case JOBCOMP:
				default:
					break;
				}
			}
			_print_small_double(outbuf, sizeof(outbuf),
					    tmp_dub, UNIT_MEGA);

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_MAXDISKWRITENODE:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_char = find_hostname(
							job->stats.
							disk_write_max_nodeid,
							job->nodes);
					break;
				case JOBSTEP:
					tmp_char = find_hostname(
						step->stats.
						disk_write_max_nodeid,
						step->nodes);
					break;
				case JOBCOMP:
				default:
					tmp_char = NULL;
					break;
				}
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_MAXDISKWRITETASK:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_uint32 =
							job->stats.
							disk_write_max_taskid;
					break;
				case JOBSTEP:
					tmp_uint32 = step->stats.
						disk_write_max_taskid;
					break;
				case JOBCOMP:
				default:
					tmp_uint32 = NO_VAL;
					break;
				}
				if (tmp_uint32 == (uint32_t)NO_VAL)
					tmp_uint32 = NO_VAL;
			}
			field->print_routine(field,
					     tmp_uint32,
					     (curr_inx == field_count));
			break;
		case PRINT_MAXPAGES:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_uint64 =
							job->stats.pages_max;
					break;
				case JOBSTEP:
					tmp_uint64 = step->stats.pages_max;
					break;
				case JOBCOMP:
				default:
					break;
				}
				if (tmp_uint64 != (uint64_t)NO_VAL64)
					convert_num_unit(
						(double)tmp_uint64,
						outbuf, sizeof(outbuf),
						UNIT_KILO, params.units,
						params.convert_flags);
			}

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_MAXPAGESNODE:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_char = find_hostname(
							job->stats.
							pages_max_nodeid,
							job->nodes);
					break;
				case JOBSTEP:
					tmp_char = find_hostname(
						step->stats.pages_max_nodeid,
						step->nodes);
					break;
				case JOBCOMP:
				default:
					tmp_char = NULL;
					break;
				}
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_MAXPAGESTASK:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_uint32 =
							job->stats.
							pages_max_taskid;
					break;
				case JOBSTEP:
					tmp_uint32 = step->stats.
						pages_max_taskid;
					break;
				case JOBCOMP:
				default:
					tmp_uint32 = NO_VAL;
					break;
				}
				if (tmp_uint32 == (uint32_t)NO_VAL)
					tmp_uint32 = NO_VAL;
			}

			field->print_routine(field,
					     tmp_uint32,
					     (curr_inx == field_count));
			break;
		case PRINT_MAXRSS:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_uint64 = job->stats.rss_max;
					break;
				case JOBSTEP:
					tmp_uint64 = step->stats.rss_max;
					break;
				case JOBCOMP:
				default:
					break;
				}
				if (tmp_uint64 != (uint64_t)NO_VAL64)
					convert_num_unit(
						(double)tmp_uint64,
						outbuf, sizeof(outbuf),
						UNIT_KILO, params.units,
						params.convert_flags);
			}

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_MAXRSSNODE:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_char = find_hostname(
							job->stats.
							rss_max_nodeid,
							job->nodes);
					break;
				case JOBSTEP:
					tmp_char = find_hostname(
						step->stats.rss_max_nodeid,
						step->nodes);
					break;
				case JOBCOMP:
				default:
					tmp_char = NULL;
					break;
				}
			}

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_MAXRSSTASK:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_uint32 = job->stats.
							rss_max_taskid;
					break;
				case JOBSTEP:
					tmp_uint32 = step->stats.rss_max_taskid;
					break;
				case JOBCOMP:
				default:
					tmp_uint32 = NO_VAL;
					break;
				}
				if (tmp_uint32 == (uint32_t)NO_VAL)
					tmp_uint32 = NO_VAL;
			}

			field->print_routine(field,
					     tmp_uint32,
					     (curr_inx == field_count));
			break;
		case PRINT_MAXVSIZE:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_uint64 = job->stats.
							vsize_max;
					break;
				case JOBSTEP:
					tmp_uint64 = step->stats.vsize_max;
					break;
				case JOBCOMP:
				default:
					tmp_uint64 = (uint64_t)NO_VAL64;
					break;
				}

				if (tmp_uint64 != (uint64_t)NO_VAL64)
					convert_num_unit(
						(double)tmp_uint64,
						outbuf, sizeof(outbuf),
						UNIT_KILO, params.units,
						params.convert_flags);
			}

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_MAXVSIZENODE:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_char = find_hostname(
							job->stats.
							vsize_max_nodeid,
							job->nodes);
					break;
				case JOBSTEP:
					tmp_char = find_hostname(
						step->stats.vsize_max_nodeid,
						step->nodes);
					break;
				case JOBCOMP:
				default:
					tmp_char = NULL;
					break;
				}
			}

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_MAXVSIZETASK:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_uint32 =
							job->stats.
							vsize_max_taskid;
					break;
				case JOBSTEP:
					tmp_uint32 = step->stats.
						vsize_max_taskid;
					break;
				case JOBCOMP:
				default:
					tmp_uint32 = NO_VAL;
					break;
				}
				if (tmp_uint32 == (uint32_t)NO_VAL)
					tmp_uint32 = NO_VAL;
			}

			field->print_routine(field,
					     tmp_uint32,
					     (curr_inx == field_count));
			break;
		case PRINT_MINCPU:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_dub = job->stats.cpu_min;
					break;
				case JOBSTEP:
					tmp_dub = step->stats.cpu_min;
					break;
				case JOBCOMP:
				default:
					break;
				}
				if (!fuzzy_equal(tmp_dub, NO_VAL))
					tmp_char = _elapsed_time(
						(long)tmp_dub, 0);
			}

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_MINCPUNODE:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_char = find_hostname(
							job->stats.
							cpu_min_nodeid,
							job->nodes);
					break;
				case JOBSTEP:
					tmp_char = find_hostname(
						step->stats.cpu_min_nodeid,
						step->nodes);
					break;
				case JOBCOMP:
				default:
					tmp_char = NULL;
					break;
				}
			}

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_MINCPUTASK:
			if (got_stats) {
				switch(type) {
				case JOB:
					if (!job->track_steps)
						tmp_uint32 = job->stats.
							cpu_min_taskid;
					break;
				case JOBSTEP:
					tmp_uint32 = step->stats.cpu_min_taskid;
					break;
				case JOBCOMP:
				default:
					tmp_uint32 = NO_VAL;
					break;
				}
				if (tmp_uint32 == (uint32_t)NO_VAL)
					tmp_uint32 = NO_VAL;
			}

			field->print_routine(field,
					     tmp_uint32,
					     (curr_inx == field_count));
			break;
		case PRINT_NODELIST:
			switch(type) {
			case JOB:
				tmp_char = job->nodes;
				break;
			case JOBSTEP:
				tmp_char = step->nodes;
				break;
			case JOBCOMP:
				tmp_char = job_comp->nodelist;
				break;
			default:
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_NNODES:
			switch(type) {
			case JOB:
				tmp_int = job->alloc_nodes;
				tmp_char = (job->tres_alloc_str &&
					    job->tres_alloc_str[0])
					? job->tres_alloc_str :
					job->tres_req_str;
				break;
			case JOBSTEP:
				tmp_int = step->nnodes;
				tmp_char = step->tres_alloc_str;
				break;
			case JOBCOMP:
				tmp_int = job_comp->node_cnt;
				break;
			default:
				break;
			}

			if (!tmp_int && tmp_char) {
				if ((tmp_uint64 =
				     slurmdb_find_tres_count_in_string(
					     tmp_char, TRES_NODE))
				    != INFINITE64)
					tmp_int = tmp_uint64;
			}
			convert_num_unit((double)tmp_int, outbuf,
					 sizeof(outbuf), UNIT_NONE,
					 params.units, params.convert_flags);
			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_NTASKS:
			switch(type) {
			case JOB:
				if (!job->track_steps && !step)
					tmp_int = cpu_tres_rec_count;
				// we want to use the step info
				if (!step)
					break;
			case JOBSTEP:
				tmp_int = step->ntasks;
				break;
			case JOBCOMP:
			default:

				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_PRIO:
			switch(type) {
			case JOB:
				tmp_int = job->priority;
				break;
			case JOBSTEP:

				break;
			case JOBCOMP:

				break;
			default:

				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_PARTITION:
			switch(type) {
			case JOB:
				tmp_char = job->partition;
				break;
			case JOBSTEP:

				break;
			case JOBCOMP:
				tmp_char = job_comp->partition;
				break;
			default:

				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_QOS:
			switch(type) {
			case JOB:
				tmp_int = job->qosid;
				break;
			case JOBSTEP:

				break;
			case JOBCOMP:

				break;
			default:

				break;
			}
			if (!g_qos_list) {
				slurmdb_qos_cond_t qos_cond;
				memset(&qos_cond, 0,
				       sizeof(slurmdb_qos_cond_t));
				qos_cond.with_deleted = 1;
				g_qos_list = slurmdb_qos_get(
					acct_db_conn, &qos_cond);
			}

			tmp_char = _find_qos_name_from_list(g_qos_list,
							    tmp_int);
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_QOSRAW:
			switch(type) {
			case JOB:
				tmp_int = job->qosid;
				break;
			case JOBSTEP:

				break;
			case JOBCOMP:

				break;
			default:

				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_REQ_CPUFREQ_MIN:
			switch (type) {
			case JOB:
				if (!job->track_steps && !step)
					tmp_dub = NO_VAL;
				// we want to use the step info
				if (!step)
					break;
			case JOBSTEP:
				tmp_dub = step->req_cpufreq_min;
				break;
			default:
				break;
			}
			cpu_freq_to_string(outbuf, sizeof(outbuf), tmp_dub);
			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_REQ_CPUFREQ_MAX:
			switch (type) {
			case JOB:
				if (!job->track_steps && !step)
					tmp_dub = NO_VAL;
				// we want to use the step info
				if (!step)
					break;
			case JOBSTEP:
				tmp_dub = step->req_cpufreq_max;
				break;
			default:
				break;
			}
			cpu_freq_to_string(outbuf, sizeof(outbuf), tmp_dub);
			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_REQ_CPUFREQ_GOV:
			switch (type) {
			case JOB:
				if (!job->track_steps && !step)
					tmp_dub = NO_VAL;
				// we want to use the step info
				if (!step)
					break;
			case JOBSTEP:
				tmp_dub = step->req_cpufreq_gov;
				break;
			default:
				break;
			}
			cpu_freq_to_string(outbuf, sizeof(outbuf), tmp_dub);
			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_REQ_CPUS:
			switch(type) {
			case JOB:
				tmp_int = job->req_cpus;
				break;
			case JOBSTEP:
				tmp_int = step_cpu_tres_rec_count;
				break;
			case JOBCOMP:

				break;
			default:

				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_REQ_GRES:
			switch(type) {
			case JOB:
				tmp_char = job->req_gres;
				break;
			case JOBSTEP:
				tmp_char = step->job_ptr->req_gres;
				break;
			case JOBCOMP:
			default:
				tmp_char = NULL;
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_REQ_MEM:
			switch(type) {
			case JOB:
				tmp_uint32 = job->req_mem;
				break;
			case JOBSTEP:
				tmp_uint32 = step->job_ptr->req_mem;
				break;
			case JOBCOMP:
			default:
				tmp_uint32 = NO_VAL;
				break;
			}

			if (tmp_uint32 != (uint32_t)NO_VAL) {
				bool per_cpu = false;
				if (tmp_uint32 & MEM_PER_CPU) {
					tmp_uint32 &= (~MEM_PER_CPU);
					per_cpu = true;
				}
				convert_num_unit((double)tmp_uint32,
						 outbuf, sizeof(outbuf),
						 UNIT_MEGA, params.units,
						 params.convert_flags);
				if (per_cpu)
					sprintf(outbuf+strlen(outbuf), "c");
				else
					sprintf(outbuf+strlen(outbuf), "n");
			}
			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_REQ_NODES:
			switch(type) {
			case JOB:
				tmp_int = 0;
				tmp_char = job->tres_req_str;
				break;
			case JOBSTEP:
				tmp_int = step->nnodes;
				tmp_char = step->tres_alloc_str;
				break;
			case JOBCOMP:
				tmp_int = job_comp->node_cnt;
				break;
			default:
				break;
			}

			if (!tmp_int && tmp_char) {
				if ((tmp_uint64 =
				     slurmdb_find_tres_count_in_string(
					     tmp_char, TRES_NODE))
				    != INFINITE64)
					tmp_int = tmp_uint64;
			}
			convert_num_unit((double)tmp_int, outbuf,
					 sizeof(outbuf), UNIT_NONE,
					 params.units, params.convert_flags);

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_RESERVATION:
			switch(type) {
			case JOB:
				if (job->resv_name) {
					tmp_char = job->resv_name;
				} else {
					tmp_char = NULL;
				}
				break;
			case JOBSTEP:
				tmp_char = NULL;
				break;
			case JOBCOMP:
				tmp_char = NULL;
				break;
			default:
				tmp_char = NULL;
				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_RESERVATION_ID:
			switch(type) {
			case JOB:
				if (job->resvid)
					tmp_uint32 = job->resvid;
				else
					tmp_uint32 = NO_VAL;
				break;
			case JOBSTEP:
				tmp_uint32 = NO_VAL;
				break;
			case JOBCOMP:
				tmp_uint32 = NO_VAL;
				break;
			default:
				tmp_uint32 = NO_VAL;
				break;
			}
			if (tmp_uint32 == (uint32_t)NO_VAL)
				tmp_uint32 = NO_VAL;
			field->print_routine(field,
					     tmp_uint32,
					     (curr_inx == field_count));
			break;
		case PRINT_RESV:
			switch(type) {
			case JOB:
				if (job->start)
					tmp_int = job->start - job->eligible;
				else
					tmp_int = time(NULL) - job->eligible;
				break;
			case JOBSTEP:
				break;
			case JOBCOMP:

				break;
			default:

				break;
			}
			field->print_routine(field,
					     (uint64_t)tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_RESV_CPU:
			switch(type) {
			case JOB:
				if (job->start)
					tmp_int = (job->start - job->eligible)
						* job->req_cpus;
				else
					tmp_int = (time(NULL) - job->eligible)
						* job->req_cpus;
				break;
			case JOBSTEP:
				break;
			case JOBCOMP:

				break;
			default:

				break;
			}
			field->print_routine(field,
					     (uint64_t)tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_RESV_CPU_RAW:
			switch(type) {
			case JOB:
				if (job->start)
					tmp_int = (job->start - job->eligible)
						* job->req_cpus;
				else
					tmp_int = (time(NULL) - job->eligible)
						* job->req_cpus;
				break;
			case JOBSTEP:
				break;
			case JOBCOMP:

				break;
			default:

				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_START:
			switch(type) {
			case JOB:
				tmp_int = job->start;
				break;
			case JOBSTEP:
				tmp_int = step->start;
				break;
			case JOBCOMP:
				tmp_int = parse_time(job_comp->start_time, 1);
				break;
			default:

				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_STATE:
			switch(type) {
			case JOB:
				tmp_int = job->state;
				tmp_int2 = job->requid;
				break;
			case JOBSTEP:
				tmp_int = step->state;
				tmp_int2 = step->requid;
				break;
			case JOBCOMP:
				tmp_char = job_comp->state;
				break;
			default:

				break;
			}

			if (((tmp_int & JOB_STATE_BASE) == JOB_CANCELLED) &&
			    (tmp_int2 != -1))
				snprintf(outbuf, FORMAT_STRING_SIZE,
					 "%s by %d",
					 job_state_string(tmp_int),
					 tmp_int2);
			else if (tmp_int != NO_VAL)
				snprintf(outbuf, FORMAT_STRING_SIZE,
					 "%s",
					 job_state_string(tmp_int));
			else if (tmp_char)
				snprintf(outbuf, FORMAT_STRING_SIZE,
					 "%s",
					 tmp_char);

			field->print_routine(field,
					     outbuf,
					     (curr_inx == field_count));
			break;
		case PRINT_SUBMIT:
			switch(type) {
			case JOB:
				tmp_int = job->submit;
				break;
			case JOBSTEP:
				tmp_int = step->start;
				break;
			case JOBCOMP:
				tmp_int = parse_time(job_comp->start_time, 1);
				break;
			default:

				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_SUSPENDED:
			switch(type) {
			case JOB:
				tmp_int = job->suspended;
				break;
			case JOBSTEP:
				tmp_int = step->suspended;
				break;
			case JOBCOMP:

				break;
			default:

				break;
			}
			field->print_routine(field,
					     (uint64_t)tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_SYSTEMCPU:
			if (got_stats) {
				switch(type) {
				case JOB:
					tmp_int = job->sys_cpu_sec;
					tmp_int2 = job->sys_cpu_usec;
					break;
				case JOBSTEP:
					tmp_int = step->sys_cpu_sec;
					tmp_int2 = step->sys_cpu_usec;
					break;
				case JOBCOMP:
				default:
					break;
				}
				tmp_char = _elapsed_time(tmp_int, tmp_int2);
			}

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_TIMELIMIT:
			switch(type) {
			case JOB:
				if (job->timelimit == INFINITE)
					tmp_char = "UNLIMITED";
				else if (job->timelimit == NO_VAL)
					tmp_char = "Partition_Limit";
				else if (job->timelimit) {
					char tmp1[128];
					mins2time_str(job->timelimit,
						      tmp1, sizeof(tmp1));
					tmp_char = tmp1;
				}
				break;
			case JOBSTEP:
				break;
			case JOBCOMP:
				tmp_char = job_comp->timelimit;
				break;
			default:

				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_TOTALCPU:
			switch(type) {
			case JOB:
				tmp_int = job->tot_cpu_sec;
				tmp_int2 = job->tot_cpu_usec;
				break;
			case JOBSTEP:
				tmp_int = step->tot_cpu_sec;
				tmp_int2 = step->tot_cpu_usec;
				break;
			case JOBCOMP:

				break;
			default:

				break;
			}
			tmp_char = _elapsed_time(tmp_int, tmp_int2);

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_TRESA:
			switch(type) {
			case JOB:
				tmp_char = job->tres_alloc_str;
				break;
			case JOBSTEP:
				tmp_char = step->tres_alloc_str;
				break;
			case JOBCOMP:
			default:
				tmp_char = NULL;
				break;
			}

			if (!g_tres_list) {
				slurmdb_tres_cond_t tres_cond;
				memset(&tres_cond, 0,
				       sizeof(slurmdb_tres_cond_t));
				tres_cond.with_deleted = 1;
				g_tres_list = slurmdb_tres_get(
					acct_db_conn, &tres_cond);
			}

			tmp_char = slurmdb_make_tres_string_from_simple(
				tmp_char, g_tres_list, params.units,
				params.convert_flags);

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_TRESR:
			switch(type) {
			case JOB:
				tmp_char = job->tres_req_str;
				break;
			case JOBSTEP:
			case JOBCOMP:
			default:
				tmp_char = NULL;
				break;
			}

			if (!g_tres_list) {
				slurmdb_tres_cond_t tres_cond;
				memset(&tres_cond, 0,
				       sizeof(slurmdb_tres_cond_t));
				tres_cond.with_deleted = 1;
				g_tres_list = slurmdb_tres_get(
					acct_db_conn, &tres_cond);
			}

			tmp_char = slurmdb_make_tres_string_from_simple(
				tmp_char, g_tres_list, params.units,
				params.convert_flags);

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_UID:
			switch(type) {
			case JOB:
				if (job->user) {
					if ((pw=getpwnam(job->user)))
						tmp_int = pw->pw_uid;
				} else
					tmp_int = job->uid;
				break;
			case JOBSTEP:
				break;
			case JOBCOMP:
				tmp_int = job_comp->uid;
				break;
			default:

				break;
			}

			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		case PRINT_USER:
			switch(type) {
			case JOB:
				if (job->user)
					tmp_char = job->user;
				else if (job->uid != -1) {
					if ((pw=getpwuid(job->uid)))
						tmp_char = pw->pw_name;
				}
				break;
			case JOBSTEP:

				break;
			case JOBCOMP:
				tmp_char = job_comp->uid_name;
				break;
			default:

				break;
			}

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_USERCPU:
			if (got_stats) {
				switch(type) {
				case JOB:
					tmp_int = job->user_cpu_sec;
					tmp_int2 = job->user_cpu_usec;
					break;
				case JOBSTEP:
					tmp_int = step->user_cpu_sec;
					tmp_int2 = step->user_cpu_usec;
					break;
				case JOBCOMP:
				default:
					break;
				}
				tmp_char = _elapsed_time(tmp_int, tmp_int2);
			}

			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			xfree(tmp_char);
			break;
		case PRINT_WCKEY:
			switch(type) {
			case JOB:
				tmp_char = job->wckey;
				break;
			case JOBSTEP:

				break;
			case JOBCOMP:

				break;
			default:

				break;
			}
			field->print_routine(field,
					     tmp_char,
					     (curr_inx == field_count));
			break;
		case PRINT_WCKEYID:
			switch(type) {
			case JOB:
				tmp_int = job->wckeyid;
				break;
			case JOBSTEP:

				break;
			case JOBCOMP:

				break;
			default:

				break;
			}
			field->print_routine(field,
					     tmp_int,
					     (curr_inx == field_count));
			break;
		default:
			break;
		}
		curr_inx++;
	}
	printf("\n");
}
Пример #20
0
/*
 * Attempt to start a job
 * jobid     (IN) - job id
 * task_cnt  (IN) - total count of tasks to start
 * hostlist  (IN) - SLURM hostlist expression with no repeated hostnames
 * tasklist  (IN/OUT) - comma separated list of hosts with tasks to be started,
 *                  list hostname once per task to start
 * comment_ptr (IN) - new comment field for the job or NULL for no change
 * err_code (OUT) - Moab error code
 * err_msg  (OUT) - Moab error message
 */
static int	_start_job(uint32_t jobid, int task_cnt, char *hostlist,
			char *tasklist, char *comment_ptr,
			int *err_code, char **err_msg)
{
	int rc = 0, old_task_cnt = 1;
	struct job_record *job_ptr;
	/* Write lock on job info, read lock on node info */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK };
	char *new_node_list = NULL;
	static char tmp_msg[128];
	bitstr_t *new_bitmap = (bitstr_t *) NULL;
	bitstr_t *save_req_bitmap = (bitstr_t *) NULL;
	bitoff_t i, bsize;
	int ll; /* layout info index */
	char *node_name, *node_idx, *node_cur, *save_req_nodes = NULL;
	size_t node_name_len;
	static uint32_t cr_test = 0, cr_enabled = 0;

	if (cr_test == 0) {
		select_g_get_info_from_plugin(SELECT_CR_PLUGIN, NULL,
						&cr_enabled);
		cr_test = 1;
	}

	lock_slurmctld(job_write_lock);
	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		*err_code = -700;
		*err_msg = "No such job";
		error("wiki: Failed to find job %u", jobid);
		rc = -1;
		goto fini;
	}

	if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) {
		*err_code = -700;
		*err_msg = "Job not pending, can't start";
		error("wiki: Attempt to start job %u in state %s",
			jobid, job_state_string(job_ptr->job_state));
		rc = -1;
		goto fini;
	}

	if (comment_ptr) {
		char *reserved = strstr(comment_ptr, "RESERVED:");
		if (reserved) {
			reserved += 9;
			job_ptr->details->reserved_resources =
				strtol(reserved, NULL, 10);
		}
		xfree(job_ptr->comment);
		job_ptr->comment = xstrdup(comment_ptr);
	}

	if (task_cnt) {
		new_node_list = xstrdup(hostlist);
		if (node_name2bitmap(new_node_list, false, &new_bitmap) != 0) {
			*err_code = -700;
			*err_msg = "Invalid TASKLIST";
			error("wiki: Attempt to set invalid node list for "
				"job %u, %s",
				jobid, hostlist);
			xfree(new_node_list);
			rc = -1;
			goto fini;
		}

		if (!bit_super_set(new_bitmap, avail_node_bitmap)) {
			/* Selected node is UP and not responding
			 * or it just went DOWN */
			*err_code = -700;
			*err_msg = "TASKLIST includes non-responsive node";
			error("wiki: Attempt to use non-responsive nodes for "
				"job %u, %s",
				jobid, hostlist);
			xfree(new_node_list);
			FREE_NULL_BITMAP(new_bitmap);
			rc = -1;
			goto fini;
		}

		/* User excluded node list incompatible with Wiki
		 * Exclude all nodes not explicitly requested */
		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);
		job_ptr->details->exc_node_bitmap = bit_copy(new_bitmap);
		bit_not(job_ptr->details->exc_node_bitmap);
	}

	/* Build layout information from tasklist (assuming that Moab
	 * sends a non-bracketed list of nodes, repeated as many times
	 * as cpus should be used per node); at this point, node names
	 * are comma-separated. This is _not_ a fast algorithm as it
	 * performs many string compares. */
	xfree(job_ptr->details->req_node_layout);
	if (task_cnt && cr_enabled) {
		uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task);
		job_ptr->details->req_node_layout = (uint16_t *)
			xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t));
		bsize = bit_size(new_bitmap);
		for (i = 0, ll = -1; i < bsize; i++) {
			if (!bit_test(new_bitmap, i))
				continue;
			ll++;
			node_name = node_record_table_ptr[i].name;
			node_name_len  = strlen(node_name);
			if (node_name_len == 0)
				continue;
			node_cur = tasklist;
			while (*node_cur) {
				if ((node_idx = strstr(node_cur, node_name))) {
					if ((node_idx[node_name_len] == ',') ||
				 	    (node_idx[node_name_len] == '\0')) {
						job_ptr->details->
							req_node_layout[ll] +=
							cpus_per_task;
					}
					node_cur = strchr(node_idx, ',');
					if (node_cur)
						continue;
				}
				break;
			}
		}
	}

	/* save and update job state to start now */
	save_req_nodes = job_ptr->details->req_nodes;
	job_ptr->details->req_nodes = new_node_list;
	save_req_bitmap = job_ptr->details->req_node_bitmap;
	job_ptr->details->req_node_bitmap = new_bitmap;
	old_task_cnt = job_ptr->details->min_cpus;
	job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt);
	job_ptr->priority = 100000000;

 fini:	unlock_slurmctld(job_write_lock);
	if (rc)
		return rc;

	/* No errors so far */
	(void) schedule(INFINITE);	/* provides own locking */

	/* Check to insure the job was actually started */
	lock_slurmctld(job_write_lock);
	if (job_ptr->job_id != jobid)
		job_ptr = find_job_record(jobid);

	if (job_ptr && (job_ptr->job_id == jobid) &&
	    (!IS_JOB_RUNNING(job_ptr))) {
		uint16_t wait_reason = 0;
		char *wait_string;

		if (IS_JOB_FAILED(job_ptr))
			wait_string = "Invalid request, job aborted";
		else {
			wait_reason = job_ptr->state_reason;
			if (wait_reason == WAIT_HELD) {
				/* some job is completing, slurmctld did
				 * not even try to schedule this job */
				wait_reason = WAIT_RESOURCES;
			}
			wait_string = job_reason_string(wait_reason);
			job_ptr->state_reason = WAIT_HELD;
			xfree(job_ptr->state_desc);
		}
		*err_code = -910 - wait_reason;
		snprintf(tmp_msg, sizeof(tmp_msg),
			"Could not start job %u(%s): %s",
			jobid, new_node_list, wait_string);
		*err_msg = tmp_msg;
		error("wiki: %s", tmp_msg);

		/* restore some of job state */
		job_ptr->priority = 0;
		job_ptr->details->min_cpus = old_task_cnt;
		rc = -1;
	}

	if (job_ptr && (job_ptr->job_id == jobid) && job_ptr->details) {
		/* Restore required node list in case job requeued */
		xfree(job_ptr->details->req_nodes);
		job_ptr->details->req_nodes = save_req_nodes;
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
		job_ptr->details->req_node_bitmap = save_req_bitmap;
		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);
		xfree(job_ptr->details->req_node_layout);
	} else {
		error("wiki: start_job(%u) job missing", jobid);
		xfree(save_req_nodes);
		FREE_NULL_BITMAP(save_req_bitmap);
	}

	unlock_slurmctld(job_write_lock);
	schedule_node_save();	/* provides own locking */
	schedule_job_save();	/* provides own locking */
	return rc;
}
Пример #21
0
extern List mysql_jobcomp_process_get_jobs(slurmdb_job_cond_t *job_cond)
{

	char *query = NULL;
	char *extra = NULL;
	char *tmp = NULL;
	char *selected_part = NULL;
	slurmdb_selected_step_t *selected_step = NULL;
	ListIterator itr = NULL;
	int set = 0;
	MYSQL_RES *result = NULL;
	MYSQL_ROW row;
	int i;
	int lc = 0;
	jobcomp_job_rec_t *job = NULL;
	char time_str[32];
	time_t temp_time;
	List job_list = list_create(jobcomp_destroy_job);

	if (job_cond->step_list && list_count(job_cond->step_list)) {
		set = 0;
		xstrcat(extra, " where (");
		itr = list_iterator_create(job_cond->step_list);
		while((selected_step = list_next(itr))) {
			if (set)
				xstrcat(extra, " || ");
			tmp = xstrdup_printf("jobid=%d",
					      selected_step->jobid);
			xstrcat(extra, tmp);
			set = 1;
			xfree(tmp);
		}
		list_iterator_destroy(itr);
		xstrcat(extra, ")");
	}

	if (job_cond->partition_list && list_count(job_cond->partition_list)) {
		set = 0;
		if (extra)
			xstrcat(extra, " && (");
		else
			xstrcat(extra, " where (");

		itr = list_iterator_create(job_cond->partition_list);
		while((selected_part = list_next(itr))) {
			if (set)
				xstrcat(extra, " || ");
			tmp = xstrdup_printf("`partition`='%s'",
					      selected_part);
			xstrcat(extra, tmp);
			set = 1;
			xfree(tmp);
		}
		list_iterator_destroy(itr);
		xstrcat(extra, ")");
	}

	i = 0;
	while(jobcomp_table_fields[i].name) {
		if (i)
			xstrcat(tmp, ", ");
		xstrcat(tmp, jobcomp_table_fields[i].name);
		i++;
	}

	query = xstrdup_printf("select %s from %s", tmp, jobcomp_table);
	xfree(tmp);

	if (extra) {
		xstrcat(query, extra);
		xfree(extra);
	}

	//info("query = %s", query);
	if (!(result =
	     mysql_db_query_ret(jobcomp_mysql_conn, query, 0))) {
		xfree(query);
		list_destroy(job_list);
		return NULL;
	}
	xfree(query);

	while((row = mysql_fetch_row(result))) {
		lc++;

		job = xmalloc(sizeof(jobcomp_job_rec_t));
		if (row[JOBCOMP_REQ_JOBID])
			job->jobid = slurm_atoul(row[JOBCOMP_REQ_JOBID]);
		job->partition = xstrdup(row[JOBCOMP_REQ_PARTITION]);
		temp_time = atoi(row[JOBCOMP_REQ_STARTTIME]);
		slurm_make_time_str(&temp_time,
				    time_str,
				    sizeof(time_str));

		job->start_time = xstrdup(time_str);
		temp_time = atoi(row[JOBCOMP_REQ_ENDTIME]);
		slurm_make_time_str(&temp_time,
				    time_str,
				    sizeof(time_str));

		job->elapsed_time = atoi(row[JOBCOMP_REQ_ENDTIME])
			- atoi(row[JOBCOMP_REQ_STARTTIME]);

		job->end_time = xstrdup(time_str);
		if (row[JOBCOMP_REQ_UID])
			job->uid = slurm_atoul(row[JOBCOMP_REQ_UID]);
		job->uid_name = xstrdup(row[JOBCOMP_REQ_USER_NAME]);
		if (row[JOBCOMP_REQ_GID])
			job->gid = slurm_atoul(row[JOBCOMP_REQ_GID]);
		job->gid_name = xstrdup(row[JOBCOMP_REQ_GROUP_NAME]);
		job->jobname = xstrdup(row[JOBCOMP_REQ_NAME]);
		job->nodelist = xstrdup(row[JOBCOMP_REQ_NODELIST]);
		if (row[JOBCOMP_REQ_NODECNT])
			job->node_cnt = slurm_atoul(row[JOBCOMP_REQ_NODECNT]);
		if (row[JOBCOMP_REQ_STATE]) {
			i = atoi(row[JOBCOMP_REQ_STATE]);
			job->state = xstrdup(job_state_string(i));
		}
		job->timelimit = xstrdup(row[JOBCOMP_REQ_TIMELIMIT]);
		if (row[JOBCOMP_REQ_MAXPROCS])
			job->max_procs = slurm_atoul(row[JOBCOMP_REQ_MAXPROCS]);
		job->connection = xstrdup(row[JOBCOMP_REQ_CONNECTION]);
		job->reboot = xstrdup(row[JOBCOMP_REQ_REBOOT]);
		job->rotate = xstrdup(row[JOBCOMP_REQ_ROTATE]);
		job->geo = xstrdup(row[JOBCOMP_REQ_GEOMETRY]);
		job->bg_start_point = xstrdup(row[JOBCOMP_REQ_START]);
		job->blockid = xstrdup(row[JOBCOMP_REQ_BLOCKID]);
		list_append(job_list, job);
	}

	mysql_free_result(result);

	return job_list;
}
Пример #22
0
/* Creates a tree model containing the completions */
void _search_entry(sview_search_info_t *sview_search_info)
{
	int id = 0;
	char title[100];
	ListIterator itr = NULL;
	popup_info_t *popup_win = NULL;
	GError *error = NULL;
	char *upper = NULL, *lower = NULL;
	char *type;

	if (cluster_flags & CLUSTER_FLAG_BG)
		type = "Midplane";
	else
		type = "Node";

	if (sview_search_info->int_data == NO_VAL &&
	    (!sview_search_info->gchar_data
	     || !strlen(sview_search_info->gchar_data))) {
		g_print("nothing given to search for.\n");
		return;
	}

	switch(sview_search_info->search_type) {
	case SEARCH_JOB_STATE:
		id = JOB_PAGE;
		upper = job_state_string(sview_search_info->int_data);
		lower = str_tolower(upper);
		snprintf(title, 100, "Job(s) in the %s state", lower);
		xfree(lower);
		break;
	case SEARCH_JOB_ID:
		id = JOB_PAGE;
		snprintf(title, 100, "Job %s info",
			 sview_search_info->gchar_data);
		break;
	case SEARCH_JOB_USER:
		id = JOB_PAGE;
		snprintf(title, 100, "Job(s) info for user %s",
			 sview_search_info->gchar_data);
		break;
	case SEARCH_BLOCK_STATE:
		id = BLOCK_PAGE;
		upper = bg_block_state_string(sview_search_info->int_data);
		lower = str_tolower(upper);
		snprintf(title, 100, "BG Block(s) in the %s state", lower);
		xfree(lower);
		break;
	case SEARCH_BLOCK_NAME:
		id = BLOCK_PAGE;
		snprintf(title, 100, "Block %s info",
			 sview_search_info->gchar_data);
		break;
	case SEARCH_BLOCK_SIZE:
		id = BLOCK_PAGE;
		sview_search_info->int_data =
			revert_num_unit(sview_search_info->gchar_data);
		if (sview_search_info->int_data == -1)
			return;
		snprintf(title, 100, "Block(s) of size %d cnodes",
			 sview_search_info->int_data);
		break;
	case SEARCH_PARTITION_NAME:
		id = PART_PAGE;
		snprintf(title, 100, "Partition %s info",
			 sview_search_info->gchar_data);
		break;
	case SEARCH_PARTITION_STATE:
		id = PART_PAGE;
		if (sview_search_info->int_data)
			snprintf(title, 100, "Partition(s) that are up");
		else
			snprintf(title, 100, "Partition(s) that are down");
		break;
	case SEARCH_NODE_NAME:
		id = NODE_PAGE;
		snprintf(title, 100, "%s(s) %s info",
			 type, sview_search_info->gchar_data);
		break;
	case SEARCH_NODE_STATE:
		id = NODE_PAGE;
		upper = node_state_string(sview_search_info->int_data);
		lower = str_tolower(upper);
		snprintf(title, 100, "%s(s) in the %s state",
			 type, lower);
		xfree(lower);

		break;
	case SEARCH_RESERVATION_NAME:
		id = RESV_PAGE;
		snprintf(title, 100, "Reservation %s info",
			 sview_search_info->gchar_data);
		break;
	default:
		g_print("unknown search type %d.\n",
			sview_search_info->search_type);

		return;
	}

	itr = list_iterator_create(popup_list);
	while ((popup_win = list_next(itr))) {
		if (popup_win->spec_info)
			if (!strcmp(popup_win->spec_info->title, title)) {
				break;
			}
	}
	list_iterator_destroy(itr);

	if (!popup_win) {
		popup_win = create_popup_info(id, id, title);
	} else {
		gtk_window_present(GTK_WINDOW(popup_win->popup));
		return;
	}
	memcpy(popup_win->spec_info->search_info, sview_search_info,
	       sizeof(sview_search_info_t));

	if (!g_thread_create((gpointer)popup_thr, popup_win, FALSE, &error))
	{
		g_printerr ("Failed to create main popup thread: %s\n",
			    error->message);
		return;
	}
	return;
}
Пример #23
0
extern int slurm_jobcomp_log_record ( struct job_record *job_ptr )
{
	int rc = SLURM_SUCCESS;
	char job_rec[1024];
	char usr_str[32], grp_str[32], start_str[32], end_str[32], lim_str[32];
	char *resv_name, *gres, *account, *qos, *wckey, *cluster;
	char submit_time[32], eligible_time[32], array_id[64], pack_id[64];
	char select_buf[128], *state_string, *work_dir;
	size_t offset = 0, tot_size, wrote;
	uint32_t job_state;
	uint32_t time_limit;

	if ((log_name == NULL) || (job_comp_fd < 0)) {
		error("JobCompLoc log file %s not open", log_name);
		return SLURM_ERROR;
	}

	slurm_mutex_lock( &file_lock );
	_get_user_name(job_ptr->user_id, usr_str, sizeof(usr_str));
	_get_group_name(job_ptr->group_id, grp_str, sizeof(grp_str));

	if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr)
		time_limit = job_ptr->part_ptr->max_time;
	else
		time_limit = job_ptr->time_limit;
	if (time_limit == INFINITE)
		strcpy(lim_str, "UNLIMITED");
	else {
		snprintf(lim_str, sizeof(lim_str), "%lu",
			 (unsigned long) time_limit);
	}

	if (job_ptr->job_state & JOB_RESIZING) {
		time_t now = time(NULL);
		state_string = job_state_string(job_ptr->job_state);
		if (job_ptr->resize_time) {
			_make_time_str(&job_ptr->resize_time, start_str,
				       sizeof(start_str));
		} else {
			_make_time_str(&job_ptr->start_time, start_str,
				       sizeof(start_str));
		}
		_make_time_str(&now, end_str, sizeof(end_str));
	} else {
		/* Job state will typically have JOB_COMPLETING or JOB_RESIZING
		 * flag set when called. We remove the flags to get the eventual
		 * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */
		job_state = job_ptr->job_state & JOB_STATE_BASE;
		state_string = job_state_string(job_state);
		if (job_ptr->resize_time) {
			_make_time_str(&job_ptr->resize_time, start_str,
				       sizeof(start_str));
		} else if (job_ptr->start_time > job_ptr->end_time) {
			/* Job cancelled while pending and
			 * expected start time is in the future. */
			snprintf(start_str, sizeof(start_str), "Unknown");
		} else {
			_make_time_str(&job_ptr->start_time, start_str,
				       sizeof(start_str));
		}
		_make_time_str(&job_ptr->end_time, end_str, sizeof(end_str));
	}

	if (job_ptr->details && job_ptr->details->work_dir)
		work_dir = job_ptr->details->work_dir;
	else
		work_dir = "unknown";

	if (job_ptr->resv_name && job_ptr->resv_name[0])
		resv_name = job_ptr->resv_name;
	else
		resv_name = "";

	if (job_ptr->gres_req && job_ptr->gres_req[0])
		gres = job_ptr->gres_req;
	else
		gres = "";

	if (job_ptr->account && job_ptr->account[0])
		account = job_ptr->account;
	else
		account = "";

	if (job_ptr->qos_ptr != NULL) {
		qos = job_ptr->qos_ptr->name;
	} else
		qos = "";

	if (job_ptr->wckey && job_ptr->wckey[0])
		wckey = job_ptr->wckey;
	else
		wckey = "";

	if (job_ptr->assoc_ptr != NULL)
		cluster = job_ptr->assoc_ptr->cluster;
	else
		cluster = "unknown";

	if (job_ptr->details && job_ptr->details->submit_time) {
		_make_time_str(&job_ptr->details->submit_time,
			       submit_time, sizeof(submit_time));
	} else {
		snprintf(submit_time, sizeof(submit_time), "unknown");
	}

	if (job_ptr->details && job_ptr->details->begin_time) {
		_make_time_str(&job_ptr->details->begin_time,
			       eligible_time, sizeof(eligible_time));
	} else {
		snprintf(eligible_time, sizeof(eligible_time), "unknown");
	}

	if (job_ptr->array_task_id != NO_VAL) {
		snprintf(array_id, sizeof(array_id),
			 " ArrayJobId=%u ArrayTaskId=%u",
			 job_ptr->array_job_id, job_ptr->array_task_id);
	} else {
		array_id[0] = '\0';
	}

	if (job_ptr->pack_job_id) {
		snprintf(pack_id, sizeof(pack_id),
			 " PackJobId=%u PackJobOffset=%u",
			 job_ptr->pack_job_id, job_ptr->pack_job_offset);
	} else {
		pack_id[0] = '\0';
	}

	select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
		select_buf, sizeof(select_buf), SELECT_PRINT_MIXED);

	snprintf(job_rec, sizeof(job_rec), JOB_FORMAT,
		 (unsigned long) job_ptr->job_id, usr_str,
		 (unsigned long) job_ptr->user_id, grp_str,
		 (unsigned long) job_ptr->group_id, job_ptr->name,
		 state_string, job_ptr->partition, lim_str, start_str,
		 end_str, job_ptr->nodes, job_ptr->node_cnt,
		 job_ptr->total_cpus, work_dir, resv_name, gres, account, qos,
		 wckey, cluster, submit_time, eligible_time, array_id, pack_id,
		 job_ptr->derived_ec, job_ptr->exit_code, select_buf);
	tot_size = strlen(job_rec);

	while (offset < tot_size) {
		wrote = write(job_comp_fd, job_rec + offset,
			tot_size - offset);
		if (wrote == -1) {
			if (errno == EAGAIN)
				continue;
			else {
				plugin_errno = errno;
				rc = SLURM_ERROR;
				break;
			}
		}
		offset += wrote;
	}
	slurm_mutex_unlock( &file_lock );
	return rc;
}