Exemplo n.º 1
0
extern int as_mysql_step_start(mysql_conn_t *mysql_conn,
			       struct step_record *step_ptr)
{
	int tasks = 0, nodes = 0, task_dist = 0;
	int rc=SLURM_SUCCESS;
	char node_list[BUFFER_SIZE];
	char *node_inx = NULL, *step_name = NULL;
	time_t start_time, submit_time;
	char *query = NULL;

	if (!step_ptr->job_ptr->db_index
	    && ((!step_ptr->job_ptr->details
		 || !step_ptr->job_ptr->details->submit_time)
		&& !step_ptr->job_ptr->resize_time)) {
		error("as_mysql_step_start: "
		      "Not inputing this job, it has no submit time.");
		return SLURM_ERROR;
	}

	if (step_ptr->job_ptr->resize_time) {
		submit_time = start_time = step_ptr->job_ptr->resize_time;
		if (step_ptr->start_time > submit_time)
			start_time = step_ptr->start_time;
	} else {
		start_time = step_ptr->start_time;
		submit_time = step_ptr->job_ptr->details->submit_time;
	}

	if (check_connection(mysql_conn) != SLURM_SUCCESS)
		return ESLURM_DB_CONNECTION;
	if (slurmdbd_conf) {
		if (step_ptr->job_ptr->details)
			tasks = step_ptr->job_ptr->details->num_tasks;
		else
			tasks = step_ptr->cpu_count;
		snprintf(node_list, BUFFER_SIZE, "%s",
			 step_ptr->job_ptr->nodes);
		nodes = step_ptr->step_layout->node_cnt;
		task_dist = step_ptr->step_layout->task_dist;
		node_inx = step_ptr->network;
	} else if (step_ptr->step_id == SLURM_BATCH_SCRIPT) {
		char temp_bit[BUF_SIZE];

		if (step_ptr->step_node_bitmap) {
			node_inx = bit_fmt(temp_bit, sizeof(temp_bit),
					   step_ptr->step_node_bitmap);
		}
		/* We overload gres with the node name of where the
		   script was running.
		*/
		snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->gres);
		nodes = tasks = 1;
	} else {
		char *ionodes = NULL, *temp_nodes = NULL;
		char temp_bit[BUF_SIZE];

		if (step_ptr->step_node_bitmap) {
			node_inx = bit_fmt(temp_bit, sizeof(temp_bit),
					   step_ptr->step_node_bitmap);
		}
#ifdef HAVE_BG_L_P
		/* Only L and P use this code */
		if (step_ptr->job_ptr->details)
			tasks = step_ptr->job_ptr->details->min_cpus;
		else
			tasks = step_ptr->job_ptr->cpu_cnt;
		select_g_select_jobinfo_get(step_ptr->job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &nodes);
		temp_nodes = step_ptr->job_ptr->nodes;
#else
		if (!step_ptr->step_layout
		    || !step_ptr->step_layout->task_cnt) {
			if (step_ptr->cpu_count)
				tasks = step_ptr->cpu_count;
			else {
				if (!(tasks = slurmdb_find_tres_count_in_string(
					      step_ptr->tres_alloc_str,
					      TRES_CPU))) {
					if (!(tasks =
					      slurmdb_find_tres_count_in_string(
						      step_ptr->job_ptr->
						      tres_alloc_str,
						      TRES_CPU)))
						tasks = step_ptr->job_ptr->
							total_nodes;
				}
			}

			nodes = step_ptr->job_ptr->total_nodes;
			temp_nodes = step_ptr->job_ptr->nodes;
		} else {
			tasks = step_ptr->step_layout->task_cnt;
#ifdef HAVE_BGQ
			select_g_select_jobinfo_get(step_ptr->select_jobinfo,
						    SELECT_JOBDATA_NODE_CNT,
						    &nodes);
#else
			nodes = step_ptr->step_layout->node_cnt;
#endif
			task_dist = step_ptr->step_layout->task_dist;
			temp_nodes = step_ptr->step_layout->node_list;
		}
#endif
		select_g_select_jobinfo_get(step_ptr->select_jobinfo,
					    SELECT_JOBDATA_IONODES,
					    &ionodes);
		if (ionodes) {
			snprintf(node_list, BUFFER_SIZE, "%s[%s]",
				 temp_nodes, ionodes);
			xfree(ionodes);
		} else
			snprintf(node_list, BUFFER_SIZE, "%s", temp_nodes);
	}

	if (!step_ptr->job_ptr->db_index) {
		if (!(step_ptr->job_ptr->db_index =
		      _get_db_index(mysql_conn,
				    submit_time,
				    step_ptr->job_ptr->job_id,
				    step_ptr->job_ptr->assoc_id))) {
			/* If we get an error with this just fall
			 * through to avoid an infinite loop
			 */
			if (as_mysql_job_start(mysql_conn, step_ptr->job_ptr)
			    == SLURM_ERROR) {
				error("couldn't add job %u at step start",
				      step_ptr->job_ptr->job_id);
				return SLURM_SUCCESS;
			}
		}
	}

	step_name = slurm_add_slash_to_quotes(step_ptr->name);

	/* we want to print a -1 for the requid so leave it a
	   %d */
	/* The stepid could be -2 so use %d not %u */
	query = xstrdup_printf(
		"insert into \"%s_%s\" (job_db_inx, id_step, time_start, "
		"step_name, state, tres_alloc, "
		"nodes_alloc, task_cnt, nodelist, node_inx, "
		"task_dist, req_cpufreq, req_cpufreq_min, req_cpufreq_gov) "
		"values (%d, %d, %d, '%s', %d, '%s', %d, %d, "
		"'%s', '%s', %d, %u, %u, %u) "
		"on duplicate key update "
		"nodes_alloc=%d, task_cnt=%d, time_end=0, state=%d, "
		"nodelist='%s', node_inx='%s', task_dist=%d, "
		"req_cpufreq=%u, req_cpufreq_min=%u, req_cpufreq_gov=%u,"
		"tres_alloc='%s';",
		mysql_conn->cluster_name, step_table,
		step_ptr->job_ptr->db_index,
		step_ptr->step_id,
		(int)start_time, step_name,
		JOB_RUNNING, step_ptr->tres_alloc_str,
		nodes, tasks, node_list, node_inx, task_dist,
		step_ptr->cpu_freq_max, step_ptr->cpu_freq_min,
		step_ptr->cpu_freq_gov, nodes, tasks, JOB_RUNNING,
		node_list, node_inx, task_dist, step_ptr->cpu_freq_max,
		step_ptr->cpu_freq_min, step_ptr->cpu_freq_gov,
		step_ptr->tres_alloc_str);
	if (debug_flags & DEBUG_FLAG_DB_STEP)
		DB_DEBUG(mysql_conn->conn, "query\n%s", query);
	rc = mysql_db_query(mysql_conn, query);
	xfree(query);
	xfree(step_name);

	return rc;
}
Exemplo n.º 2
0
extern int as_mysql_job_start(mysql_conn_t *mysql_conn,
			      struct job_record *job_ptr)
{
	int rc=SLURM_SUCCESS;
	char *nodes = NULL, *jname = NULL, *node_inx = NULL;
	int track_steps = 0;
	char *block_id = NULL, *partition = NULL,
		*gres_req = NULL, *gres_alloc = NULL;
	char *query = NULL;
	int reinit = 0;
	time_t begin_time, check_time, start_time, submit_time;
	uint32_t wckeyid = 0;
	int job_state, node_cnt = 0;
	uint32_t job_db_inx = job_ptr->db_index;
	job_array_struct_t *array_recs = job_ptr->array_recs;

	if ((!job_ptr->details || !job_ptr->details->submit_time)
	    && !job_ptr->resize_time) {
		error("as_mysql_job_start: "
		      "Not inputing this job, it has no submit time.");
		return SLURM_ERROR;
	}

	if (check_connection(mysql_conn) != SLURM_SUCCESS)
		return ESLURM_DB_CONNECTION;

	debug2("as_mysql_slurmdb_job_start() called");

	job_state = job_ptr->job_state;

	if (job_ptr->resize_time) {
		begin_time  = job_ptr->resize_time;
		submit_time = job_ptr->resize_time;
		start_time  = job_ptr->resize_time;
	} else {
		begin_time  = job_ptr->details->begin_time;
		submit_time = job_ptr->details->submit_time;
		start_time  = job_ptr->start_time;
	}

	/* If the reason is WAIT_ARRAY_TASK_LIMIT we don't want to
	 * give the pending jobs an eligible time since it will add
	 * time to accounting where as these jobs aren't able to run
	 * until later so mark it as such.
	 */
	if (job_ptr->state_reason == WAIT_ARRAY_TASK_LIMIT)
		begin_time = INFINITE;

	/* Since we need a new db_inx make sure the old db_inx
	 * removed. This is most likely the only time we are going to
	 * be notified of the change also so make the state without
	 * the resize. */
	if (IS_JOB_RESIZING(job_ptr)) {
		/* If we have a db_index lets end the previous record. */
		if (!job_ptr->db_index) {
			error("We don't have a db_index for job %u, "
			      "this should only happen when resizing "
			      "jobs and the database interface was down.",
			      job_ptr->job_id);
			job_ptr->db_index = _get_db_index(mysql_conn,
							  job_ptr->details->
							  submit_time,
							  job_ptr->job_id,
							  job_ptr->assoc_id);
		}

		if (job_ptr->db_index)
			as_mysql_job_complete(mysql_conn, job_ptr);

		job_state &= (~JOB_RESIZING);
		job_ptr->db_index = 0;
	}

	job_state &= JOB_STATE_BASE;

	/* See what we are hearing about here if no start time. If
	 * this job latest time is before the last roll up we will
	 * need to reset it to look at this job. */
	if (start_time)
		check_time = start_time;
	else if (begin_time)
		check_time = begin_time;
	else
		check_time = submit_time;

	slurm_mutex_lock(&rollup_lock);
	if (check_time < global_last_rollup) {
		MYSQL_RES *result = NULL;
		MYSQL_ROW row;

		/* check to see if we are hearing about this time for the
		 * first time.
		 */
		query = xstrdup_printf("select job_db_inx "
				       "from \"%s_%s\" where id_job=%u and "
				       "time_submit=%ld and time_eligible=%ld "
				       "and time_start=%ld;",
				       mysql_conn->cluster_name,
				       job_table, job_ptr->job_id,
				       submit_time, begin_time, start_time);
		if (debug_flags & DEBUG_FLAG_DB_JOB)
			DB_DEBUG(mysql_conn->conn, "query\n%s", query);
		if (!(result =
		      mysql_db_query_ret(mysql_conn, query, 0))) {
			xfree(query);
			slurm_mutex_unlock(&rollup_lock);
			return SLURM_ERROR;
		}
		xfree(query);
		if ((row = mysql_fetch_row(result))) {
			mysql_free_result(result);
			debug4("revieved an update for a "
			       "job (%u) already known about",
			       job_ptr->job_id);
			slurm_mutex_unlock(&rollup_lock);
			goto no_rollup_change;
		}
		mysql_free_result(result);

		if (job_ptr->start_time)
			debug("Need to reroll usage from %s Job %u "
			      "from %s started then and we are just "
			      "now hearing about it.",
			      slurm_ctime2(&check_time),
			      job_ptr->job_id, mysql_conn->cluster_name);
		else if (begin_time)
			debug("Need to reroll usage from %s Job %u "
			      "from %s became eligible then and we are just "
			      "now hearing about it.",
			      slurm_ctime2(&check_time),
			      job_ptr->job_id, mysql_conn->cluster_name);
		else
			debug("Need to reroll usage from %s Job %u "
			      "from %s was submitted then and we are just "
			      "now hearing about it.",
			      slurm_ctime2(&check_time),
			      job_ptr->job_id, mysql_conn->cluster_name);

		global_last_rollup = check_time;
		slurm_mutex_unlock(&rollup_lock);

		/* If the times here are later than the daily_rollup
		   or monthly rollup it isn't a big deal since they
		   are always shrunk down to the beginning of each
		   time period.
		*/
		query = xstrdup_printf("update \"%s_%s\" set "
				       "hourly_rollup=%ld, "
				       "daily_rollup=%ld, monthly_rollup=%ld",
				       mysql_conn->cluster_name,
				       last_ran_table, check_time,
				       check_time, check_time);
		if (debug_flags & DEBUG_FLAG_DB_JOB)
			DB_DEBUG(mysql_conn->conn, "query\n%s", query);
		rc = mysql_db_query(mysql_conn, query);
		xfree(query);
	} else
		slurm_mutex_unlock(&rollup_lock);

no_rollup_change:

	if (job_ptr->name && job_ptr->name[0])
		jname = slurm_add_slash_to_quotes(job_ptr->name);
	else {
		jname = xstrdup("allocation");
		track_steps = 1;
	}

	if (job_ptr->nodes && job_ptr->nodes[0])
		nodes = job_ptr->nodes;
	else
		nodes = "None assigned";

	if (job_ptr->batch_flag)
		track_steps = 1;

	if (slurmdbd_conf) {
		block_id = xstrdup(job_ptr->comment);
		node_cnt = job_ptr->total_nodes;
		node_inx = job_ptr->network;
	} else {
		char temp_bit[BUF_SIZE];

		if (job_ptr->node_bitmap) {
			node_inx = bit_fmt(temp_bit, sizeof(temp_bit),
					   job_ptr->node_bitmap);
		}
#ifdef HAVE_BG
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_BLOCK_ID,
					    &block_id);
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &node_cnt);
#else
		node_cnt = job_ptr->total_nodes;
#endif
	}

	/* Grab the wckey once to make sure it is placed. */
	if (job_ptr->assoc_id && (!job_ptr->db_index || job_ptr->wckey))
		wckeyid = _get_wckeyid(mysql_conn, &job_ptr->wckey,
				       job_ptr->user_id,
				       mysql_conn->cluster_name,
				       job_ptr->assoc_id);

	if (!IS_JOB_PENDING(job_ptr) && job_ptr->part_ptr)
		partition = slurm_add_slash_to_quotes(job_ptr->part_ptr->name);
	else if (job_ptr->partition)
		partition = slurm_add_slash_to_quotes(job_ptr->partition);

	if (job_ptr->gres_req)
		gres_req = slurm_add_slash_to_quotes(job_ptr->gres_req);

	if (job_ptr->gres_alloc)
		gres_alloc = slurm_add_slash_to_quotes(job_ptr->gres_alloc);

	if (!job_ptr->db_index) {
		if (!begin_time)
			begin_time = submit_time;
		query = xstrdup_printf(
			"insert into \"%s_%s\" "
			"(id_job, id_array_job, id_array_task, "
			"id_assoc, id_qos, id_user, "
			"id_group, nodelist, id_resv, timelimit, "
			"time_eligible, time_submit, time_start, "
			"job_name, track_steps, state, priority, cpus_req, "
			"nodes_alloc, mem_req",
			mysql_conn->cluster_name, job_table);

		if (wckeyid)
			xstrcat(query, ", id_wckey");
		if (job_ptr->account)
			xstrcat(query, ", account");
		if (partition)
			xstrcat(query, ", `partition`");
		if (block_id)
			xstrcat(query, ", id_block");
		if (job_ptr->wckey)
			xstrcat(query, ", wckey");
		if (node_inx)
			xstrcat(query, ", node_inx");
		if (gres_req)
			xstrcat(query, ", gres_req");
		if (gres_alloc)
			xstrcat(query, ", gres_alloc");
		if (array_recs && array_recs->task_id_str)
			xstrcat(query, ", array_task_str, array_max_tasks, "
				"array_task_pending");
		else
			xstrcat(query, ", array_task_str, array_task_pending");

		if (job_ptr->tres_alloc_str)
			xstrcat(query, ", tres_alloc");

		xstrfmtcat(query,
			   ") values (%u, %u, %u, %u, %u, %u, %u, "
			   "'%s', %u, %u, %ld, %ld, %ld, "
			   "'%s', %u, %u, %u, %u, %u, %u",
			   job_ptr->job_id, job_ptr->array_job_id,
			   job_ptr->array_task_id, job_ptr->assoc_id,
			   job_ptr->qos_id,
			   job_ptr->user_id, job_ptr->group_id, nodes,
			   job_ptr->resv_id, job_ptr->time_limit,
			   begin_time, submit_time, start_time,
			   jname, track_steps, job_state,
			   job_ptr->priority, job_ptr->details->min_cpus,
			   node_cnt,
			   job_ptr->details->pn_min_memory);

		if (wckeyid)
			xstrfmtcat(query, ", %u", wckeyid);
		if (job_ptr->account)
			xstrfmtcat(query, ", '%s'", job_ptr->account);
		if (partition)
			xstrfmtcat(query, ", '%s'", partition);
		if (block_id)
			xstrfmtcat(query, ", '%s'", block_id);
		if (job_ptr->wckey)
			xstrfmtcat(query, ", '%s'", job_ptr->wckey);
		if (node_inx)
			xstrfmtcat(query, ", '%s'", node_inx);
		if (gres_req)
			xstrfmtcat(query, ", '%s'", gres_req);
		if (gres_alloc)
			xstrfmtcat(query, ", '%s'", gres_alloc);
		if (array_recs && array_recs->task_id_str)
			xstrfmtcat(query, ", '%s', %u, %u",
				   array_recs->task_id_str,
				   array_recs->max_run_tasks,
				   array_recs->task_cnt);
		else
			xstrcat(query, ", NULL, 0");

		if (job_ptr->tres_alloc_str)
			xstrfmtcat(query, ", '%s'", job_ptr->tres_alloc_str);

		xstrfmtcat(query,
			   ") on duplicate key update "
			   "job_db_inx=LAST_INSERT_ID(job_db_inx), "
			   "id_user=%u, id_group=%u, "
			   "nodelist='%s', id_resv=%u, timelimit=%u, "
			   "time_submit=%ld, time_eligible=%ld, "
			   "time_start=%ld, "
			   "job_name='%s', track_steps=%u, id_qos=%u, "
			   "state=greatest(state, %u), priority=%u, "
			   "cpus_req=%u, nodes_alloc=%u, "
			   "mem_req=%u, id_array_job=%u, id_array_task=%u",
			   job_ptr->user_id, job_ptr->group_id, nodes,
			   job_ptr->resv_id, job_ptr->time_limit,
			   submit_time, begin_time, start_time,
			   jname, track_steps, job_ptr->qos_id, job_state,
			   job_ptr->priority, job_ptr->details->min_cpus,
			   node_cnt,
			   job_ptr->details->pn_min_memory,
			   job_ptr->array_job_id,
			   job_ptr->array_task_id);

		if (wckeyid)
			xstrfmtcat(query, ", id_wckey=%u", wckeyid);
		if (job_ptr->account)
			xstrfmtcat(query, ", account='%s'", job_ptr->account);
		if (partition)
			xstrfmtcat(query, ", `partition`='%s'", partition);
		if (block_id)
			xstrfmtcat(query, ", id_block='%s'", block_id);
		if (job_ptr->wckey)
			xstrfmtcat(query, ", wckey='%s'", job_ptr->wckey);
		if (node_inx)
			xstrfmtcat(query, ", node_inx='%s'", node_inx);
		if (gres_req)
			xstrfmtcat(query, ", gres_req='%s'", gres_req);
		if (gres_alloc)
			xstrfmtcat(query, ", gres_alloc='%s'", gres_alloc);
		if (array_recs && array_recs->task_id_str)
			xstrfmtcat(query, ", array_task_str='%s', "
				   "array_max_tasks=%u, array_task_pending=%u",
				   array_recs->task_id_str,
				   array_recs->max_run_tasks,
				   array_recs->task_cnt);
		else
			xstrfmtcat(query, ", array_task_str=NULL, "
				   "array_task_pending=0");

		if (job_ptr->tres_alloc_str)
			xstrfmtcat(query, ", tres_alloc='%s'",
				   job_ptr->tres_alloc_str);

		if (debug_flags & DEBUG_FLAG_DB_JOB)
			DB_DEBUG(mysql_conn->conn, "query\n%s", query);
	try_again:
		if (!(job_ptr->db_index = mysql_db_insert_ret_id(
			      mysql_conn, query))) {
			if (!reinit) {
				error("It looks like the storage has gone "
				      "away trying to reconnect");
				mysql_db_close_db_connection(
					mysql_conn);
				/* reconnect */
				check_connection(mysql_conn);
				reinit = 1;
				goto try_again;
			} else
				rc = SLURM_ERROR;
		}
	} else {
		query = xstrdup_printf("update \"%s_%s\" set nodelist='%s', ",
				       mysql_conn->cluster_name,
				       job_table, nodes);

		if (wckeyid)
			xstrfmtcat(query, "id_wckey=%u, ", wckeyid);
		if (job_ptr->account)
			xstrfmtcat(query, "account='%s', ", job_ptr->account);
		if (partition)
			xstrfmtcat(query, "`partition`='%s', ", partition);
		if (block_id)
			xstrfmtcat(query, "id_block='%s', ", block_id);
		if (job_ptr->wckey)
			xstrfmtcat(query, "wckey='%s', ", job_ptr->wckey);
		if (node_inx)
			xstrfmtcat(query, "node_inx='%s', ", node_inx);
		if (gres_req)
			xstrfmtcat(query, "gres_req='%s', ", gres_req);
		if (gres_alloc)
			xstrfmtcat(query, "gres_alloc='%s', ", gres_alloc);
		if (array_recs && array_recs->task_id_str)
			xstrfmtcat(query, "array_task_str='%s', "
				   "array_max_tasks=%u, "
				   "array_task_pending=%u, ",
				   array_recs->task_id_str,
				   array_recs->max_run_tasks,
				   array_recs->task_cnt);
		else
			xstrfmtcat(query, "array_task_str=NULL, "
				   "array_task_pending=0, ");

		if (job_ptr->tres_alloc_str)
			xstrfmtcat(query, "tres_alloc='%s', ",
				   job_ptr->tres_alloc_str);

		xstrfmtcat(query, "time_start=%ld, job_name='%s', state=%u, "
			   "nodes_alloc=%u, id_qos=%u, "
			   "id_assoc=%u, id_resv=%u, "
			   "timelimit=%u, mem_req=%u, "
			   "id_array_job=%u, id_array_task=%u, "
			   "time_eligible=%ld where job_db_inx=%d",
			   start_time, jname, job_state,
			   node_cnt, job_ptr->qos_id,
			   job_ptr->assoc_id,
			   job_ptr->resv_id, job_ptr->time_limit,
			   job_ptr->details->pn_min_memory,
			   job_ptr->array_job_id,
			   job_ptr->array_task_id,
			   begin_time, job_ptr->db_index);

		if (debug_flags & DEBUG_FLAG_DB_JOB)
			DB_DEBUG(mysql_conn->conn, "query\n%s", query);
		rc = mysql_db_query(mysql_conn, query);
	}

	xfree(block_id);
	xfree(partition);
	xfree(gres_req);
	xfree(gres_alloc);
	xfree(jname);
	xfree(query);

	/* now we will reset all the steps */
	if (IS_JOB_RESIZING(job_ptr)) {
		/* FIXME : Verify this is still needed */
		if (IS_JOB_SUSPENDED(job_ptr))
			as_mysql_suspend(mysql_conn, job_db_inx, job_ptr);
	}

	return rc;
}
Exemplo n.º 3
0
/*
 * slurm_sprint_job_info - output information about a specific Slurm
 *	job based upon message as loaded using slurm_load_jobs
 * IN job_ptr - an individual job information record pointer
 * IN one_liner - print as a single line if true
 * RET out - char * containing formatted output (must be freed after call)
 *           NULL is returned on failure.
 */
extern char *
slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner )
{
	int i, j, k;
	char time_str[32], *group_name, *user_name;
	char tmp1[128], tmp2[128];
	char *tmp6_ptr;
	char tmp_line[1024 * 128];
	char tmp_path[MAXPATHLEN];
	char *ionodes = NULL;
	uint16_t exit_status = 0, term_sig = 0;
	job_resources_t *job_resrcs = job_ptr->job_resrcs;
	char *out = NULL;
	time_t run_time;
	uint32_t min_nodes, max_nodes = 0;
	char *nodelist = "NodeList";
	bitstr_t *cpu_bitmap;
	char *host;
	int sock_inx, sock_reps, last;
	int abs_node_inx, rel_node_inx;
	int64_t nice;
	int bit_inx, bit_reps;
	uint32_t *last_mem_alloc_ptr = NULL;
	uint32_t last_mem_alloc = NO_VAL;
	char *last_hosts;
	hostlist_t hl, hl_last;
	char select_buf[122];
	uint32_t cluster_flags = slurmdb_setup_cluster_flags();
	uint32_t threads;
	char *line_end = (one_liner) ? " " : "\n   ";

	if (cluster_flags & CLUSTER_FLAG_BG) {
		nodelist = "MidplaneList";
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_IONODES,
					    &ionodes);
	}

	/****** Line 1 ******/
	xstrfmtcat(out, "JobId=%u ", job_ptr->job_id);

	if (job_ptr->array_job_id) {
		if (job_ptr->array_task_str) {
			xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%s ",
				   job_ptr->array_job_id,
				   job_ptr->array_task_str);
		} else {
			xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%u ",
				   job_ptr->array_job_id,
				   job_ptr->array_task_id);
		}
	}
	xstrfmtcat(out, "JobName=%s", job_ptr->name);
	xstrcat(out, line_end);

	/****** Line 2 ******/
	user_name = uid_to_string((uid_t) job_ptr->user_id);
	group_name = gid_to_string((gid_t) job_ptr->group_id);
	xstrfmtcat(out, "UserId=%s(%u) GroupId=%s(%u) MCS_label=%s",
		   user_name, job_ptr->user_id, group_name, job_ptr->group_id,
		   (job_ptr->mcs_label==NULL) ? "N/A" : job_ptr->mcs_label);
	xfree(user_name);
	xfree(group_name);
	xstrcat(out, line_end);

	/****** Line 3 ******/
	nice = ((int64_t)job_ptr->nice) - NICE_OFFSET;
	xstrfmtcat(out, "Priority=%u Nice=%"PRIi64" Account=%s QOS=%s",
		   job_ptr->priority, nice, job_ptr->account, job_ptr->qos);
	if (slurm_get_track_wckey())
		xstrfmtcat(out, " WCKey=%s", job_ptr->wckey);
	xstrcat(out, line_end);

	/****** Line 4 ******/
	xstrfmtcat(out, "JobState=%s ", job_state_string(job_ptr->job_state));

	if (job_ptr->state_desc) {
		/* Replace white space with underscore for easier parsing */
		for (j=0; job_ptr->state_desc[j]; j++) {
			if (isspace((int)job_ptr->state_desc[j]))
				job_ptr->state_desc[j] = '_';
		}
		xstrfmtcat(out, "Reason=%s ", job_ptr->state_desc);
	} else
		xstrfmtcat(out, "Reason=%s ", job_reason_string(job_ptr->state_reason));

	xstrfmtcat(out, "Dependency=%s", job_ptr->dependency);
	xstrcat(out, line_end);

	/****** Line 5 ******/
	xstrfmtcat(out, "Requeue=%u Restarts=%u BatchFlag=%u Reboot=%u ",
		 job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag,
		 job_ptr->reboot);
	if (WIFSIGNALED(job_ptr->exit_code))
		term_sig = WTERMSIG(job_ptr->exit_code);
	exit_status = WEXITSTATUS(job_ptr->exit_code);
	xstrfmtcat(out, "ExitCode=%u:%u", exit_status, term_sig);
	xstrcat(out, line_end);

	/****** Line 5a (optional) ******/
	if (job_ptr->show_flags & SHOW_DETAIL) {
		if (WIFSIGNALED(job_ptr->derived_ec))
			term_sig = WTERMSIG(job_ptr->derived_ec);
		else
			term_sig = 0;
		exit_status = WEXITSTATUS(job_ptr->derived_ec);
		xstrfmtcat(out, "DerivedExitCode=%u:%u", exit_status, term_sig);
		xstrcat(out, line_end);
	}

	/****** Line 6 ******/
	if (IS_JOB_PENDING(job_ptr))
		run_time = 0;
	else if (IS_JOB_SUSPENDED(job_ptr))
		run_time = job_ptr->pre_sus_time;
	else {
		time_t end_time;
		if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0))
			end_time = time(NULL);
		else
			end_time = job_ptr->end_time;
		if (job_ptr->suspend_time) {
			run_time = (time_t)
				(difftime(end_time, job_ptr->suspend_time)
				 + job_ptr->pre_sus_time);
		} else
			run_time = (time_t)
				difftime(end_time, job_ptr->start_time);
	}
	secs2time_str(run_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "RunTime=%s ", time_str);

	if (job_ptr->time_limit == NO_VAL)
		xstrcat(out, "TimeLimit=Partition_Limit ");
	else {
		mins2time_str(job_ptr->time_limit, time_str, sizeof(time_str));
		xstrfmtcat(out, "TimeLimit=%s ", time_str);
	}

	if (job_ptr->time_min == 0)
		xstrcat(out, "TimeMin=N/A");
	else {
		mins2time_str(job_ptr->time_min, time_str, sizeof(time_str));
		xstrfmtcat(out, "TimeMin=%s", time_str);
	}
	xstrcat(out, line_end);

	/****** Line 7 ******/
	slurm_make_time_str(&job_ptr->submit_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "SubmitTime=%s ", time_str);

	slurm_make_time_str(&job_ptr->eligible_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "EligibleTime=%s", time_str);

	xstrcat(out, line_end);

	/****** Line 8 (optional) ******/
	if (job_ptr->resize_time) {
		slurm_make_time_str(&job_ptr->resize_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "ResizeTime=%s", time_str);
		xstrcat(out, line_end);
	}

	/****** Line 9 ******/
	slurm_make_time_str(&job_ptr->start_time, time_str, sizeof(time_str));
	xstrfmtcat(out, "StartTime=%s ", time_str);

	if ((job_ptr->time_limit == INFINITE) &&
	    (job_ptr->end_time > time(NULL)))
		xstrcat(out, "EndTime=Unknown ");
	else {
		slurm_make_time_str(&job_ptr->end_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "EndTime=%s ", time_str);
	}

	if (job_ptr->deadline) {
		slurm_make_time_str(&job_ptr->deadline, time_str, sizeof(time_str));
		xstrfmtcat(out, "Deadline=%s", time_str);
	} else {
		xstrcat(out, "Deadline=N/A");
	}

	xstrcat(out, line_end);

	/****** Line 10 ******/
	if (job_ptr->preempt_time == 0)
		xstrcat(out, "PreemptTime=None ");
	else {
		slurm_make_time_str(&job_ptr->preempt_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "PreemptTime=%s ", time_str);
	}

	if (job_ptr->suspend_time) {
		slurm_make_time_str(&job_ptr->suspend_time, time_str, sizeof(time_str));
		xstrfmtcat(out, "SuspendTime=%s ", time_str);
	} else
		xstrcat(out, "SuspendTime=None ");

	xstrfmtcat(out, "SecsPreSuspend=%ld", (long int)job_ptr->pre_sus_time);
	xstrcat(out, line_end);

	/****** Line 11 ******/
	xstrfmtcat(out, "Partition=%s AllocNode:Sid=%s:%u",
		   job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid);
	xstrcat(out, line_end);

	/****** Line 12 ******/
	xstrfmtcat(out, "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes,
		   nodelist, job_ptr->exc_nodes);
	xstrcat(out, line_end);

	/****** Line 13 ******/
	xstrfmtcat(out, "%s=%s", nodelist, job_ptr->nodes);
	if (job_ptr->nodes && ionodes) {
		xstrfmtcat(out, "[%s]", ionodes);
		xfree(ionodes);
	}
	if (job_ptr->sched_nodes)
		xstrfmtcat(out, " Sched%s=%s", nodelist, job_ptr->sched_nodes);

	xstrcat(out, line_end);

	/****** Line 14 (optional) ******/
	if (job_ptr->batch_host) {
		xstrfmtcat(out, "BatchHost=%s", job_ptr->batch_host);
		xstrcat(out, line_end);
	}

	/****** Line 15 ******/
	if (cluster_flags & CLUSTER_FLAG_BG) {
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &min_nodes);
		if ((min_nodes == 0) || (min_nodes == NO_VAL)) {
			min_nodes = job_ptr->num_nodes;
			max_nodes = job_ptr->max_nodes;
		} else if (job_ptr->max_nodes)
			max_nodes = min_nodes;
	} else if (IS_JOB_PENDING(job_ptr)) {
		min_nodes = job_ptr->num_nodes;
		max_nodes = job_ptr->max_nodes;
		if (max_nodes && (max_nodes < min_nodes))
			min_nodes = max_nodes;
	} else {
		min_nodes = job_ptr->num_nodes;
		max_nodes = 0;
	}

	_sprint_range(tmp_line, sizeof(tmp_line), min_nodes, max_nodes);
	xstrfmtcat(out, "NumNodes=%s ", tmp_line);
	_sprint_range(tmp_line, sizeof(tmp_line), job_ptr->num_cpus, job_ptr->max_cpus);
	xstrfmtcat(out, "NumCPUs=%s ", tmp_line);

	xstrfmtcat(out, "NumTasks=%u ", job_ptr->num_tasks);
	xstrfmtcat(out, "CPUs/Task=%u ", job_ptr->cpus_per_task);

	if (job_ptr->boards_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "ReqB:S:C:T=*:");
	else
		xstrfmtcat(out, "ReqB:S:C:T=%u:", job_ptr->boards_per_node);

	if (job_ptr->sockets_per_board == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->sockets_per_board);

	if (job_ptr->cores_per_socket == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->cores_per_socket);

	if (job_ptr->threads_per_core == (uint16_t) NO_VAL)
		xstrcat(out, "*");
	else
		xstrfmtcat(out, "%u", job_ptr->threads_per_core);

	xstrcat(out, line_end);

	/****** Line 16 ******/
	/* Tres should already of been converted at this point from simple */
	xstrfmtcat(out, "TRES=%s",
		   job_ptr->tres_alloc_str ? job_ptr->tres_alloc_str
					   : job_ptr->tres_req_str);
	xstrcat(out, line_end);

	/****** Line 17 ******/
	if (job_ptr->sockets_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "Socks/Node=* ");
	else
		xstrfmtcat(out, "Socks/Node=%u ", job_ptr->sockets_per_node);

	if (job_ptr->ntasks_per_node == (uint16_t) NO_VAL)
		xstrcat(out, "NtasksPerN:B:S:C=*:");
	else
		xstrfmtcat(out, "NtasksPerN:B:S:C=%u:", job_ptr->ntasks_per_node);

	if (job_ptr->ntasks_per_board == (uint16_t) NO_VAL)
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->ntasks_per_board);

	if ((job_ptr->ntasks_per_socket == (uint16_t) NO_VAL) ||
	    (job_ptr->ntasks_per_socket == (uint16_t) INFINITE))
		xstrcat(out, "*:");
	else
		xstrfmtcat(out, "%u:", job_ptr->ntasks_per_socket);

	if ((job_ptr->ntasks_per_core == (uint16_t) NO_VAL) ||
	    (job_ptr->ntasks_per_core == (uint16_t) INFINITE))
		xstrcat(out, "* ");
	else
		xstrfmtcat(out, "%u ", job_ptr->ntasks_per_core);

	if (job_ptr->core_spec == (uint16_t) NO_VAL)
		xstrcat(out, "CoreSpec=*");
	else if (job_ptr->core_spec & CORE_SPEC_THREAD)
		xstrfmtcat(out, "ThreadSpec=%d",
			   (job_ptr->core_spec & (~CORE_SPEC_THREAD)));
	else
		xstrfmtcat(out, "CoreSpec=%u", job_ptr->core_spec);

	xstrcat(out, line_end);

	if (job_resrcs && cluster_flags & CLUSTER_FLAG_BG) {
		if ((job_resrcs->cpu_array_cnt > 0) &&
		    (job_resrcs->cpu_array_value) &&
		    (job_resrcs->cpu_array_reps)) {
			int length = 0;
			xstrcat(out, "CPUs=");
			for (i = 0; i < job_resrcs->cpu_array_cnt; i++) {
				/* only print 60 characters worth of this record */
				if (length > 60) {
					/* skip to last CPU group entry */
					if (i < job_resrcs->cpu_array_cnt - 1) {
						continue;
					}
					/* add ellipsis before last entry */
					xstrcat(out, "...,");
				}

				length += xstrfmtcat(out, "%d", job_resrcs->cpus[i]);
				if (job_resrcs->cpu_array_reps[i] > 1) {
					length += xstrfmtcat(out, "*%d",
							     job_resrcs->cpu_array_reps[i]);
				}
				if (i < job_resrcs->cpu_array_cnt - 1) {
					xstrcat(out, ",");
					length++;
				}
			}
			xstrcat(out, line_end);
		}
	} else if (job_resrcs && job_resrcs->core_bitmap &&
		   ((last = bit_fls(job_resrcs->core_bitmap)) != -1)) {
		hl = hostlist_create(job_resrcs->nodes);
		if (!hl) {
			error("slurm_sprint_job_info: hostlist_create: %s",
			      job_resrcs->nodes);
			return NULL;
		}
		hl_last = hostlist_create(NULL);
		if (!hl_last) {
			error("slurm_sprint_job_info: hostlist_create: NULL");
			hostlist_destroy(hl);
			return NULL;
		}

		bit_inx = 0;
		i = sock_inx = sock_reps = 0;
		abs_node_inx = job_ptr->node_inx[i];

		/* tmp1[] stores the current cpu(s) allocated */
		tmp2[0] = '\0';	/* stores last cpu(s) allocated */
		for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts;
		     rel_node_inx++) {

			if (sock_reps >=
			    job_resrcs->sock_core_rep_count[sock_inx]) {
				sock_inx++;
				sock_reps = 0;
			}
			sock_reps++;

			bit_reps = job_resrcs->sockets_per_node[sock_inx] *
				   job_resrcs->cores_per_socket[sock_inx];
			host = hostlist_shift(hl);
			threads = _threads_per_core(host);
			cpu_bitmap = bit_alloc(bit_reps * threads);
			for (j = 0; j < bit_reps; j++) {
				if (bit_test(job_resrcs->core_bitmap, bit_inx)){
					for (k = 0; k < threads; k++)
						bit_set(cpu_bitmap,
							(j * threads) + k);
				}
				bit_inx++;
			}
			bit_fmt(tmp1, sizeof(tmp1), cpu_bitmap);
			FREE_NULL_BITMAP(cpu_bitmap);
			/*
			 * If the allocation values for this host are not the
			 * same as the last host, print the report of the last
			 * group of hosts that had identical allocation values.
			 */
			if (xstrcmp(tmp1, tmp2) ||
			    (last_mem_alloc_ptr != job_resrcs->memory_allocated) ||
			    (job_resrcs->memory_allocated &&
			     (last_mem_alloc !=
			      job_resrcs->memory_allocated[rel_node_inx]))) {
				if (hostlist_count(hl_last)) {
					last_hosts =
						hostlist_ranged_string_xmalloc(
						hl_last);
					xstrfmtcat(out,
						   "  Nodes=%s CPU_IDs=%s Mem=%u",
						   last_hosts, tmp2,
						   last_mem_alloc_ptr ?
						   last_mem_alloc : 0);
					xfree(last_hosts);
					xstrcat(out, line_end);

					hostlist_destroy(hl_last);
					hl_last = hostlist_create(NULL);
				}
				strcpy(tmp2, tmp1);
				last_mem_alloc_ptr = job_resrcs->memory_allocated;
				if (last_mem_alloc_ptr)
					last_mem_alloc = job_resrcs->
						memory_allocated[rel_node_inx];
				else
					last_mem_alloc = NO_VAL;
			}
			hostlist_push_host(hl_last, host);
			free(host);

			if (bit_inx > last)
				break;

			if (abs_node_inx > job_ptr->node_inx[i+1]) {
				i += 2;
				abs_node_inx = job_ptr->node_inx[i];
			} else {
				abs_node_inx++;
			}
		}

		if (hostlist_count(hl_last)) {
			last_hosts = hostlist_ranged_string_xmalloc(hl_last);
			xstrfmtcat(out, "  Nodes=%s CPU_IDs=%s Mem=%u",
				 last_hosts, tmp2,
				 last_mem_alloc_ptr ? last_mem_alloc : 0);
			xfree(last_hosts);
			xstrcat(out, line_end);
		}
		hostlist_destroy(hl);
		hostlist_destroy(hl_last);
	}
	/****** Line 18 ******/
	if (job_ptr->pn_min_memory & MEM_PER_CPU) {
		job_ptr->pn_min_memory &= (~MEM_PER_CPU);
		tmp6_ptr = "CPU";
	} else
		tmp6_ptr = "Node";

	if (cluster_flags & CLUSTER_FLAG_BG) {
		convert_num_unit((float)job_ptr->pn_min_cpus, tmp1,
				 sizeof(tmp1), UNIT_NONE, NO_VAL,
				 CONVERT_NUM_UNIT_EXACT);
		xstrfmtcat(out, "MinCPUsNode=%s ", tmp1);
	} else {
		xstrfmtcat(out, "MinCPUsNode=%u ", job_ptr->pn_min_cpus);
	}

	convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1),
			 UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT);
	convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2),
			 UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT);
	xstrfmtcat(out, "MinMemory%s=%s MinTmpDiskNode=%s", tmp6_ptr, tmp1, tmp2);
	xstrcat(out, line_end);

	/****** Line ******/
	secs2time_str((time_t)job_ptr->delay_boot, tmp1, sizeof(tmp1));
	xstrfmtcat(out, "Features=%s DelayBoot=%s", job_ptr->features, tmp1);
	xstrcat(out, line_end);

	/****** Line ******/
	xstrfmtcat(out, "Gres=%s Reservation=%s",
		   job_ptr->gres, job_ptr->resv_name);
	xstrcat(out, line_end);

	/****** Line 20 ******/
	xstrfmtcat(out, "OverSubscribe=%s Contiguous=%d Licenses=%s Network=%s",
		   job_share_string(job_ptr->shared), job_ptr->contiguous,
		   job_ptr->licenses, job_ptr->network);
	xstrcat(out, line_end);

	/****** Line 21 ******/
	xstrfmtcat(out, "Command=%s", job_ptr->command);
	xstrcat(out, line_end);

	/****** Line 22 ******/
	xstrfmtcat(out, "WorkDir=%s", job_ptr->work_dir);

	if (cluster_flags & CLUSTER_FLAG_BG) {
		/****** Line 23 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_BG_ID);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "Block_ID=%s", select_buf);
		}

		/****** Line 24 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MIXED_SHORT);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrcat(out, select_buf);
		}

		if (cluster_flags & CLUSTER_FLAG_BGL) {
			/****** Line 25 (optional) ******/
			select_g_select_jobinfo_sprint(
				job_ptr->select_jobinfo,
				select_buf, sizeof(select_buf),
				SELECT_PRINT_BLRTS_IMAGE);
			if (select_buf[0] != '\0') {
				xstrcat(out, line_end);
				xstrfmtcat(out, "BlrtsImage=%s", select_buf);
			}
		}
		/****** Line 26 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_LINUX_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			if (cluster_flags & CLUSTER_FLAG_BGL)
				xstrfmtcat(out, "LinuxImage=%s", select_buf);
			else
				xstrfmtcat(out, "CnloadImage=%s", select_buf);
		}
		/****** Line 27 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_MLOADER_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			xstrfmtcat(out, "MloaderImage=%s", select_buf);
		}
		/****** Line 28 (optional) ******/
		select_g_select_jobinfo_sprint(job_ptr->select_jobinfo,
					       select_buf, sizeof(select_buf),
					       SELECT_PRINT_RAMDISK_IMAGE);
		if (select_buf[0] != '\0') {
			xstrcat(out, line_end);
			if (cluster_flags & CLUSTER_FLAG_BGL)
				xstrfmtcat(out, "RamDiskImage=%s", select_buf);
			else
				xstrfmtcat(out, "IoloadImage=%s", select_buf);
		}
	}

	/****** Line (optional) ******/
	if (job_ptr->admin_comment) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "AdminComment=%s ", job_ptr->admin_comment);
	}

	/****** Line (optional) ******/
	if (job_ptr->comment) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "Comment=%s ", job_ptr->comment);
	}

	/****** Line 30 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stderr(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdErr=%s", tmp_path);
	}

	/****** Line 31 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stdin(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdIn=%s", tmp_path);
	}

	/****** Line 32 (optional) ******/
	if (job_ptr->batch_flag) {
		xstrcat(out, line_end);
		slurm_get_job_stdout(tmp_path, sizeof(tmp_path), job_ptr);
		xstrfmtcat(out, "StdOut=%s", tmp_path);
	}

	/****** Line 33 (optional) ******/
	if (job_ptr->batch_script) {
		xstrcat(out, line_end);
		xstrcat(out, "BatchScript=\n");
		xstrcat(out, job_ptr->batch_script);
	}

	/****** Line 34 (optional) ******/
	if (job_ptr->req_switch) {
		char time_buf[32];
		xstrcat(out, line_end);
		secs2time_str((time_t) job_ptr->wait4switch, time_buf,
			      sizeof(time_buf));
		xstrfmtcat(out, "Switches=%u@%s\n", job_ptr->req_switch, time_buf);
	}

	/****** Line 35 (optional) ******/
	if (job_ptr->burst_buffer) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "BurstBuffer=%s", job_ptr->burst_buffer);
	}

	/****** Line (optional) ******/
	if (job_ptr->burst_buffer_state) {
		xstrcat(out, line_end);
		xstrfmtcat(out, "BurstBufferState=%s",
			   job_ptr->burst_buffer_state);
	}

	/****** Line 36 (optional) ******/
	if (cpu_freq_debug(NULL, NULL, tmp1, sizeof(tmp1),
			   job_ptr->cpu_freq_gov, job_ptr->cpu_freq_min,
			   job_ptr->cpu_freq_max, NO_VAL) != 0) {
		xstrcat(out, line_end);
		xstrcat(out, tmp1);
	}

	/****** Line 37 ******/
	xstrcat(out, line_end);
	xstrfmtcat(out, "Power=%s", power_flags_str(job_ptr->power_flags));

	/****** Line 38 (optional) ******/
	if (job_ptr->bitflags) {
		xstrcat(out, line_end);
		if (job_ptr->bitflags & KILL_INV_DEP)
			xstrcat(out, "KillOInInvalidDependent=Yes");
		if (job_ptr->bitflags & NO_KILL_INV_DEP)
			xstrcat(out, "KillOInInvalidDependent=No");
		if (job_ptr->bitflags & SPREAD_JOB)
			xstrcat(out, "SpreadJob=Yes");
	}

	/****** END OF JOB RECORD ******/
	if (one_liner)
		xstrcat(out, "\n");
	else
		xstrcat(out, "\n\n");

	return out;
}
Exemplo n.º 4
0
extern int as_mysql_job_start(mysql_conn_t *mysql_conn,
			      struct job_record *job_ptr)
{
	int rc=SLURM_SUCCESS;
	char *nodes = NULL, *jname = NULL, *node_inx = NULL;
	int track_steps = 0;
	char *block_id = NULL;
	char *query = NULL;
	int reinit = 0;
	time_t begin_time, check_time, start_time, submit_time;
	uint32_t wckeyid = 0;
	int job_state, node_cnt = 0;
	uint32_t job_db_inx = job_ptr->db_index;

	if ((!job_ptr->details || !job_ptr->details->submit_time)
	    && !job_ptr->resize_time) {
		error("as_mysql_job_start: "
		      "Not inputing this job, it has no submit time.");
		return SLURM_ERROR;
	}

	if (check_connection(mysql_conn) != SLURM_SUCCESS)
		return ESLURM_DB_CONNECTION;

	debug2("as_mysql_slurmdb_job_start() called");

	job_state = job_ptr->job_state;

	if (job_ptr->resize_time) {
		begin_time  = job_ptr->resize_time;
		submit_time = job_ptr->resize_time;
		start_time  = job_ptr->resize_time;
	} else {
		begin_time  = job_ptr->details->begin_time;
		submit_time = job_ptr->details->submit_time;
		start_time  = job_ptr->start_time;
	}

	/* Since we need a new db_inx make sure the old db_inx
	 * removed. This is most likely the only time we are going to
	 * be notified of the change also so make the state without
	 * the resize. */
	if (IS_JOB_RESIZING(job_ptr)) {
		/* If we have a db_index lets end the previous record. */
		if (!job_ptr->db_index) {
			error("We don't have a db_index for job %u, "
			      "this should never happen.", job_ptr->job_id);
			job_ptr->db_index = _get_db_index(mysql_conn,
							  submit_time,
							  job_ptr->job_id,
							  job_ptr->assoc_id);
		}

		if (job_ptr->db_index)
			as_mysql_job_complete(mysql_conn, job_ptr);

		job_state &= (~JOB_RESIZING);
		job_ptr->db_index = 0;
	}

	job_state &= JOB_STATE_BASE;

	/* See what we are hearing about here if no start time. If
	 * this job latest time is before the last roll up we will
	 * need to reset it to look at this job. */
	if (start_time)
		check_time = start_time;
	else if (begin_time)
		check_time = begin_time;
	else
		check_time = submit_time;

	slurm_mutex_lock(&rollup_lock);
	if (check_time < global_last_rollup) {
		MYSQL_RES *result = NULL;
		MYSQL_ROW row;

		/* check to see if we are hearing about this time for the
		 * first time.
		 */
		query = xstrdup_printf("select job_db_inx "
				       "from \"%s_%s\" where id_job=%u and "
				       "time_submit=%ld and time_eligible=%ld "
				       "and time_start=%ld;",
				       mysql_conn->cluster_name,
				       job_table, job_ptr->job_id,
				       submit_time, begin_time, start_time);
		debug3("%d(%s:%d) query\n%s",
		       mysql_conn->conn, THIS_FILE, __LINE__, query);
		if (!(result =
		      mysql_db_query_ret(mysql_conn, query, 0))) {
			xfree(query);
			slurm_mutex_unlock(&rollup_lock);
			return SLURM_ERROR;
		}
		xfree(query);
		if ((row = mysql_fetch_row(result))) {
			mysql_free_result(result);
			debug4("revieved an update for a "
			       "job (%u) already known about",
			       job_ptr->job_id);
			slurm_mutex_unlock(&rollup_lock);
			goto no_rollup_change;
		}
		mysql_free_result(result);

		if (job_ptr->start_time)
			debug("Need to reroll usage from %sJob %u "
			      "from %s started then and we are just "
			      "now hearing about it.",
			      ctime(&check_time),
			      job_ptr->job_id, mysql_conn->cluster_name);
		else if (begin_time)
			debug("Need to reroll usage from %sJob %u "
			      "from %s became eligible then and we are just "
			      "now hearing about it.",
			      ctime(&check_time),
			      job_ptr->job_id, mysql_conn->cluster_name);
		else
			debug("Need to reroll usage from %sJob %u "
			      "from %s was submitted then and we are just "
			      "now hearing about it.",
			      ctime(&check_time),
			      job_ptr->job_id, mysql_conn->cluster_name);

		global_last_rollup = check_time;
		slurm_mutex_unlock(&rollup_lock);

		/* If the times here are later than the daily_rollup
		   or monthly rollup it isn't a big deal since they
		   are always shrunk down to the beginning of each
		   time period.
		*/
		query = xstrdup_printf("update \"%s_%s\" set "
				       "hourly_rollup=%ld, "
				       "daily_rollup=%ld, monthly_rollup=%ld",
				       mysql_conn->cluster_name,
				       last_ran_table, check_time,
				       check_time, check_time);
		debug3("%d(%s:%d) query\n%s",
		       mysql_conn->conn, THIS_FILE, __LINE__, query);
		rc = mysql_db_query(mysql_conn, query);
		xfree(query);
	} else
		slurm_mutex_unlock(&rollup_lock);

no_rollup_change:

	if (job_ptr->name && job_ptr->name[0])
		jname = slurm_add_slash_to_quotes(job_ptr->name);
	else {
		jname = xstrdup("allocation");
		track_steps = 1;
	}

	if (job_ptr->nodes && job_ptr->nodes[0])
		nodes = job_ptr->nodes;
	else
		nodes = "None assigned";

	if (job_ptr->batch_flag)
		track_steps = 1;

	if (slurmdbd_conf) {
		block_id = xstrdup(job_ptr->comment);
		node_cnt = job_ptr->total_nodes;
		node_inx = job_ptr->network;
	} else {
		char temp_bit[BUF_SIZE];

		if (job_ptr->node_bitmap) {
			node_inx = bit_fmt(temp_bit, sizeof(temp_bit),
					   job_ptr->node_bitmap);
		}
#ifdef HAVE_BG
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_BLOCK_ID,
					    &block_id);
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &node_cnt);
#else
		node_cnt = job_ptr->total_nodes;
#endif
	}

	/* If there is a start_time get the wckeyid.  If the job is
	 * cancelled before the job starts we also want to grab it. */
	if (job_ptr->assoc_id
	    && (job_ptr->start_time || IS_JOB_CANCELLED(job_ptr)))
		wckeyid = _get_wckeyid(mysql_conn, &job_ptr->wckey,
				       job_ptr->user_id,
				       mysql_conn->cluster_name,
				       job_ptr->assoc_id);

	if (!job_ptr->db_index) {
		if (!begin_time)
			begin_time = submit_time;
		query = xstrdup_printf(
			"insert into \"%s_%s\" "
			"(id_job, id_assoc, id_qos, id_wckey, id_user, "
			"id_group, nodelist, id_resv, timelimit, "
			"time_eligible, time_submit, time_start, "
			"job_name, track_steps, state, priority, cpus_req, "
			"cpus_alloc, nodes_alloc",
			mysql_conn->cluster_name, job_table);

		if (job_ptr->account)
			xstrcat(query, ", account");
		if (job_ptr->partition)
			xstrcat(query, ", partition");
		if (block_id)
			xstrcat(query, ", id_block");
		if (job_ptr->wckey)
			xstrcat(query, ", wckey");
		if (node_inx)
			xstrcat(query, ", node_inx");

		xstrfmtcat(query,
			   ") values (%u, %u, %u, %u, %u, %u, '%s', %u, %u, "
			   "%ld, %ld, %ld, '%s', %u, %u, %u, %u, %u, %u",
			   job_ptr->job_id, job_ptr->assoc_id,
			   job_ptr->qos_id, wckeyid,
			   job_ptr->user_id, job_ptr->group_id, nodes,
			   job_ptr->resv_id, job_ptr->time_limit,
			   begin_time, submit_time, start_time,
			   jname, track_steps, job_state,
			   job_ptr->priority, job_ptr->details->min_cpus,
			   job_ptr->total_cpus, node_cnt);

		if (job_ptr->account)
			xstrfmtcat(query, ", '%s'", job_ptr->account);
		if (job_ptr->partition)
			xstrfmtcat(query, ", '%s'", job_ptr->partition);
		if (block_id)
			xstrfmtcat(query, ", '%s'", block_id);
		if (job_ptr->wckey)
			xstrfmtcat(query, ", '%s'", job_ptr->wckey);
		if (node_inx)
			xstrfmtcat(query, ", '%s'", node_inx);

		xstrfmtcat(query,
			   ") on duplicate key update "
			   "job_db_inx=LAST_INSERT_ID(job_db_inx), "
			   "id_wckey=%u, id_user=%u, id_group=%u, "
			   "nodelist='%s', id_resv=%u, timelimit=%u, "
			   "time_submit=%ld, time_start=%ld, "
			   "job_name='%s', track_steps=%u, id_qos=%u, "
			   "state=greatest(state, %u), priority=%u, "
			   "cpus_req=%u, cpus_alloc=%u, nodes_alloc=%u",
			   wckeyid, job_ptr->user_id, job_ptr->group_id, nodes,
			   job_ptr->resv_id, job_ptr->time_limit,
			   submit_time, start_time,
			   jname, track_steps, job_ptr->qos_id, job_state,
			   job_ptr->priority, job_ptr->details->min_cpus,
			   job_ptr->total_cpus, node_cnt);

		if (job_ptr->account)
			xstrfmtcat(query, ", account='%s'", job_ptr->account);
		if (job_ptr->partition)
			xstrfmtcat(query, ", partition='%s'",
				   job_ptr->partition);
		if (block_id)
			xstrfmtcat(query, ", id_block='%s'", block_id);
		if (job_ptr->wckey)
			xstrfmtcat(query, ", wckey='%s'", job_ptr->wckey);
		if (node_inx)
			xstrfmtcat(query, ", node_inx='%s'", node_inx);

		debug3("%d(%s:%d) query\n%s",
		       mysql_conn->conn, THIS_FILE, __LINE__, query);
	try_again:
		if (!(job_ptr->db_index = mysql_db_insert_ret_id(
			      mysql_conn, query))) {
			if (!reinit) {
				error("It looks like the storage has gone "
				      "away trying to reconnect");
				mysql_db_close_db_connection(
					mysql_conn);
				/* reconnect */
				check_connection(mysql_conn);
				reinit = 1;
				goto try_again;
			} else
				rc = SLURM_ERROR;
		}
	} else {
		query = xstrdup_printf("update \"%s_%s\" set nodelist='%s', ",
				       mysql_conn->cluster_name,
				       job_table, nodes);

		if (job_ptr->account)
			xstrfmtcat(query, "account='%s', ", job_ptr->account);
		if (job_ptr->partition)
			xstrfmtcat(query, "partition='%s', ",
				   job_ptr->partition);
		if (block_id)
			xstrfmtcat(query, "id_block='%s', ", block_id);
		if (job_ptr->wckey)
			xstrfmtcat(query, "wckey='%s', ", job_ptr->wckey);
		if (node_inx)
			xstrfmtcat(query, "node_inx='%s', ", node_inx);

		xstrfmtcat(query, "time_start=%ld, job_name='%s', state=%u, "
			   "cpus_alloc=%u, nodes_alloc=%u, id_qos=%u, "
			   "id_assoc=%u, id_wckey=%u, id_resv=%u, timelimit=%u "
			   "where job_db_inx=%d",
			   start_time, jname, job_state,
			   job_ptr->total_cpus, node_cnt, job_ptr->qos_id,
			   job_ptr->assoc_id, wckeyid,
			   job_ptr->resv_id, job_ptr->time_limit,
			   job_ptr->db_index);
		debug3("%d(%s:%d) query\n%s",
		       mysql_conn->conn, THIS_FILE, __LINE__, query);
		rc = mysql_db_query(mysql_conn, query);
	}

	xfree(block_id);
	xfree(jname);
	xfree(query);

	/* now we will reset all the steps */
	if (IS_JOB_RESIZING(job_ptr)) {
		if (IS_JOB_SUSPENDED(job_ptr))
			as_mysql_suspend(mysql_conn, job_db_inx, job_ptr);
		/* Here we aren't sure how many cpus are being changed here in
		   the step since we don't have that information from the
		   job.  The resize of steps shouldn't happen very often in
		   the first place (srun --no-kill option), and this don't
		   effect accounting in the first place so it isn't a
		   big deal. */

		query = xstrdup_printf("update \"%s_%s\" set job_db_inx=%u "
				       "where job_db_inx=%u;",
				       mysql_conn->cluster_name, step_table,
				       job_ptr->db_index, job_db_inx);

		debug3("%d(%s:%d) query\n%s",
		       mysql_conn->conn, THIS_FILE, __LINE__, query);
		rc = mysql_db_query(mysql_conn, query);
		xfree(query);
	}

	return rc;
}
Exemplo n.º 5
0
Arquivo: req.c Projeto: treydock/slurm
static int
_handle_completion(int fd, stepd_step_rec_t *job, uid_t uid)
{
	int rc = SLURM_SUCCESS;
	int errnum = 0;
	int first;
	int last;
	jobacctinfo_t *jobacct = NULL;
	int step_rc;
	char* buf;
	int len;
	Buf buffer;
	bool lock_set = false;

	debug("_handle_completion for job %u.%u",
	      job->jobid, job->stepid);

	debug3("  uid = %d", uid);
	if (!_slurm_authorized_user(uid)) {
		debug("step completion message from uid %ld for job %u.%u ",
		      (long)uid, job->jobid, job->stepid);
		rc = -1;
		errnum = EPERM;
		/* Send the return code and errno */
		safe_write(fd, &rc, sizeof(int));
		safe_write(fd, &errnum, sizeof(int));
		return SLURM_SUCCESS;
	}

	safe_read(fd, &first, sizeof(int));
	safe_read(fd, &last, sizeof(int));
	safe_read(fd, &step_rc, sizeof(int));

	/*
	 * We must not use getinfo over a pipe with slurmd here
	 * Indeed, slurmstepd does a large use of setinfo over a pipe
	 * with slurmd and doing the reverse can result in a deadlock
	 * scenario with slurmd :
	 * slurmd(lockforread,write)/slurmstepd(write,lockforread)
	 * Do pack/unpack instead to be sure of independances of
	 * slurmd and slurmstepd
	 */
	safe_read(fd, &len, sizeof(int));
	buf = xmalloc(len);
	safe_read(fd, buf, len);
	buffer = create_buf(buf, len);
	jobacctinfo_unpack(&jobacct, SLURM_PROTOCOL_VERSION,
			   PROTOCOL_TYPE_SLURM, buffer, 1);
	free_buf(buffer);

	/*
	 * Record the completed nodes
	 */
	pthread_mutex_lock(&step_complete.lock);
	lock_set = true;
	if (! step_complete.wait_children) {
		rc = -1;
		errnum = ETIMEDOUT; /* not used anyway */
		goto timeout;
	}

	/* SlurmUser or root can craft a launch without a valid credential
	 * ("srun --no-alloc ...") and no tree information can be built
	 *  without the hostlist from the credential. */
	if (step_complete.rank >= 0) {
#if 0
		char bits_string[128];
		debug2("Setting range %d (bit %d) through %d(bit %d)",
		       first, first-(step_complete.rank+1),
		       last, last-(step_complete.rank+1));
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  before bits: %s", bits_string);
#endif
		bit_nset(step_complete.bits,
			 first - (step_complete.rank+1),
			 last - (step_complete.rank+1));
#if 0
		bit_fmt(bits_string, sizeof(bits_string), step_complete.bits);
		debug2("  after bits: %s", bits_string);
#endif
	}
	step_complete.step_rc = MAX(step_complete.step_rc, step_rc);

	/************* acct stuff ********************/
	jobacctinfo_aggregate(step_complete.jobacct, jobacct);
timeout:
	jobacctinfo_destroy(jobacct);
	/*********************************************/

	/* Send the return code and errno, we do this within the locked
	 * region to ensure that the stepd doesn't exit before we can
	 * perform this send. */
	safe_write(fd, &rc, sizeof(int));
	safe_write(fd, &errnum, sizeof(int));
	pthread_cond_signal(&step_complete.cond);
	pthread_mutex_unlock(&step_complete.lock);

	return SLURM_SUCCESS;


rwfail:	if (lock_set) {
		pthread_cond_signal(&step_complete.cond);
		pthread_mutex_unlock(&step_complete.lock);
	}
	return SLURM_FAILURE;
}
Exemplo n.º 6
0
/* Attempt to schedule a specific job on specific available nodes
 * IN job_ptr - job to schedule
 * IN/OUT avail_bitmap - nodes available/selected to use
 * IN exc_core_bitmap - cores which can not be used
 * RET SLURM_SUCCESS on success, otherwise an error code
 */
static int  _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap,
		       uint32_t min_nodes, uint32_t max_nodes,
		       uint32_t req_nodes, bitstr_t *exc_core_bitmap)
{
	bitstr_t *tmp_bitmap;
	int rc = SLURM_SUCCESS;
	int feat_cnt = _num_feature_count(job_ptr);
	List preemptee_candidates = NULL;

	if (feat_cnt) {
		/* Ideally schedule the job feature by feature,
		 * but I don't want to add that complexity here
		 * right now, so clear the feature counts and try
		 * to schedule. This will work if there is only
		 * one feature count. It should work fairly well
		 * in cases where there are multiple feature
		 * counts. */
		struct job_details *detail_ptr = job_ptr->details;
		ListIterator feat_iter;
		struct feature_record *feat_ptr;
		int i = 0, list_size;
		uint16_t *feat_cnt_orig = NULL, high_cnt = 0;

		/* Clear the feature counts */
		list_size = list_count(detail_ptr->feature_list);
		feat_cnt_orig = xmalloc(sizeof(uint16_t) * list_size);
		feat_iter = list_iterator_create(detail_ptr->feature_list);
		if (feat_iter == NULL)
			fatal("list_iterator_create: malloc failure");
		while ((feat_ptr =
			(struct feature_record *) list_next(feat_iter))) {
			high_cnt = MAX(high_cnt, feat_ptr->count);
			feat_cnt_orig[i++] = feat_ptr->count;
			feat_ptr->count = 0;
		}
		list_iterator_destroy(feat_iter);

		if ((job_req_node_filter(job_ptr, *avail_bitmap) !=
		     SLURM_SUCCESS) ||
		    (bit_set_count(*avail_bitmap) < high_cnt)) {
			rc = ESLURM_NODES_BUSY;
		} else {
			preemptee_candidates =
					slurm_find_preemptable_jobs(job_ptr);
			rc = select_g_job_test(job_ptr, *avail_bitmap,
					       high_cnt, max_nodes, req_nodes,
					       SELECT_MODE_WILL_RUN,
					       preemptee_candidates, NULL,
					       exc_core_bitmap);
		}

		/* Restore the feature counts */
		i = 0;
		feat_iter = list_iterator_create(detail_ptr->feature_list);
		while ((feat_ptr =
			(struct feature_record *) list_next(feat_iter))) {
			feat_ptr->count = feat_cnt_orig[i++];
		}
		list_iterator_destroy(feat_iter);
		xfree(feat_cnt_orig);
	} else {
		/* Try to schedule the job. First on dedicated nodes
		 * then on shared nodes (if so configured). */
		uint16_t orig_shared;
		time_t now = time(NULL);
		char str[100];

		preemptee_candidates = slurm_find_preemptable_jobs(job_ptr);
		orig_shared = job_ptr->details->shared;
		job_ptr->details->shared = 0;
		tmp_bitmap = bit_copy(*avail_bitmap);

		if (exc_core_bitmap) {
			bit_fmt(str, (sizeof(str) - 1), exc_core_bitmap);
			debug2(" _try_sched with exclude core bitmap: %s",str);
		}

		rc = select_g_job_test(job_ptr, *avail_bitmap, min_nodes,
				       max_nodes, req_nodes,
				       SELECT_MODE_WILL_RUN,
				       preemptee_candidates, NULL,
				       exc_core_bitmap);

		job_ptr->details->shared = orig_shared;

		if (((rc != SLURM_SUCCESS) || (job_ptr->start_time > now)) &&
		    (orig_shared != 0)) {
			FREE_NULL_BITMAP(*avail_bitmap);
			*avail_bitmap= tmp_bitmap;
			rc = select_g_job_test(job_ptr, *avail_bitmap,
					       min_nodes, max_nodes, req_nodes,
					       SELECT_MODE_WILL_RUN,
					       preemptee_candidates, NULL,
					       exc_core_bitmap);
		} else
			FREE_NULL_BITMAP(tmp_bitmap);
	}

	if (preemptee_candidates)
		list_destroy(preemptee_candidates);
	return rc;

}