示例#1
0
文件: squeue.c 项目: bingzhang/slurm
/* _print_job_step - print the specified job step's information */
static int
_print_job_steps( bool clear_old )
{
	int error_code;
	static job_step_info_response_msg_t * old_step_ptr = NULL;
	static job_step_info_response_msg_t  * new_step_ptr;
	uint16_t show_flags = 0;

	if (params.all_flag)
		show_flags |= SHOW_ALL;

	if (old_step_ptr) {
		if (clear_old)
			old_step_ptr->last_update = 0;
		/* Use a last_update time of 0 so that we can get an updated
		 * run_time for jobs rather than just its start_time */
		error_code = slurm_get_job_steps((time_t) 0, NO_VAL, NO_VAL,
						 &new_step_ptr, show_flags);
		if (error_code ==  SLURM_SUCCESS)
			slurm_free_job_step_info_response_msg( old_step_ptr );
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_step_ptr = old_step_ptr;
		}
	} else {
		error_code = slurm_get_job_steps((time_t) 0, NO_VAL, NO_VAL,
						 &new_step_ptr, show_flags);
	}
	if (error_code) {
		slurm_perror ("slurm_get_job_steps error");
		return SLURM_ERROR;
	}
	old_step_ptr = new_step_ptr;

	if (params.verbose) {
		printf ("last_update_time=%ld records=%u\n",
			(long) new_step_ptr->last_update,
			new_step_ptr->job_step_count);
	}

	if (!params.format && !params.format_long)
		params.format = "%.15i %.8j %.9P %.8u %.9M %N";

	if (!params.format_list) {
		if (params.format)
			parse_format(params.format);
		else if (params.format_long)
			parse_long_format(params.format_long);
	}

	print_steps_array( new_step_ptr->job_steps,
			   new_step_ptr->job_step_count,
			   params.format_list );
	return SLURM_SUCCESS;
}
示例#2
0
文件: signal.c 项目: FredHutch/slurm
/*
 * slurm_terminate_job_step - terminates a job step by sending a
 * 	REQUEST_TERMINATE_TASKS rpc to all slurmd of a job step.
 * IN job_id  - the job's id
 * IN step_id - the job step's id - use SLURM_BATCH_SCRIPT as the step_id
 *              to terminate a job's batch script
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
extern int
slurm_terminate_job_step (uint32_t job_id, uint32_t step_id)
{
	resource_allocation_response_msg_t *alloc_info = NULL;
	job_step_info_response_msg_t *step_info = NULL;
	int rc = 0;
	int i;
	int save_errno = 0;

	if (slurm_allocation_lookup_lite(job_id, &alloc_info)) {
		return -1;
	}

	/*
	 * The controller won't give us info about the batch script job step,
	 * so we need to handle that seperately.
	 */
	if (step_id == SLURM_BATCH_SCRIPT) {
		rc = _terminate_batch_script_step(alloc_info);
		slurm_free_resource_allocation_response_msg(alloc_info);
		errno = rc;
		return rc ? -1 : 0;
	}

	/*
	 * Otherwise, look through the list of job step info and find
	 * the one matching step_id.  Terminate that step.
	 */
	rc = slurm_get_job_steps((time_t)0, job_id, step_id,
				 &step_info, SHOW_ALL);
	if (rc != 0) {
		save_errno = errno;
		goto fail;
	}
	for (i = 0; i < step_info->job_step_count; i++) {
		if ((step_info->job_steps[i].job_id == job_id) &&
		    (step_info->job_steps[i].step_id == step_id)) {
			rc = _terminate_job_step(&step_info->job_steps[i],
						 alloc_info);
			save_errno = errno;
			break;
		}
	}
	slurm_free_job_step_info_response_msg(step_info);
fail:
	slurm_free_resource_allocation_response_msg(alloc_info);
	errno = save_errno;
	return rc ? -1 : 0;
}
示例#3
0
文件: update_step.c 项目: A1ve5/slurm
/* Return the current time limit of the specified job/step_id or NO_VAL if the
 * information is not available */
static uint32_t _get_step_time(uint32_t job_id, uint32_t step_id)
{
	uint32_t time_limit = NO_VAL;
	int i, rc;
	job_step_info_response_msg_t *resp;

	rc = slurm_get_job_steps((time_t) 0, job_id, step_id, &resp, SHOW_ALL);
	if (rc == SLURM_SUCCESS) {
		for (i = 0; i < resp->job_step_count; i++) {
			if ((resp->job_steps[i].job_id != job_id) ||
			    (resp->job_steps[i].step_id != step_id))
				continue;	/* should not happen */
			time_limit = resp->job_steps[i].time_limit;
			break;
		}
		slurm_free_job_step_info_response_msg(resp);
	} else {
		error("Could not load state information for step %u.%u: %m",
		      job_id, step_id);
	}

	return time_limit;
}
示例#4
0
文件: sstat.c 项目: Cray/slurm
int main(int argc, char **argv)
{
	ListIterator itr = NULL;
	uint32_t req_cpufreq = NO_VAL;
	uint32_t stepid = NO_VAL;
	slurmdb_selected_step_t *selected_step = NULL;

#ifdef HAVE_ALPS_CRAY
	error("The sstat command is not supported on Cray systems");
	return 1;
#endif
#ifdef HAVE_BG
	error("The sstat command is not supported on IBM BlueGene systems");
	return 1;
#endif

	slurm_conf_init(NULL);
	print_fields_list = list_create(NULL);
	print_fields_itr = list_iterator_create(print_fields_list);

	parse_command_line(argc, argv);
	if (!params.opt_job_list || !list_count(params.opt_job_list)) {
		error("You didn't give me any jobs to stat.");
		return 1;
	}

	print_fields_header(print_fields_list);
	itr = list_iterator_create(params.opt_job_list);
	while ((selected_step = list_next(itr))) {
		char *nodelist = NULL;
		bool free_nodelist = false;
		if (selected_step->stepid == INFINITE) {
			/* get the batch step info */
			job_info_msg_t *job_ptr = NULL;
			hostlist_t hl;

			if (slurm_load_job(
				    &job_ptr, selected_step->jobid, SHOW_ALL)) {
				error("couldn't get info for job %u",
				      selected_step->jobid);
				continue;
			}

			stepid = NO_VAL;
			hl = hostlist_create(job_ptr->job_array[0].nodes);
			nodelist = hostlist_pop(hl);
			free_nodelist = true;
			hostlist_destroy(hl);
			slurm_free_job_info_msg(job_ptr);
		} else if (selected_step->stepid != NO_VAL) {
			stepid = selected_step->stepid;
		} else if (params.opt_all_steps) {
			job_step_info_response_msg_t *step_ptr = NULL;
			int i = 0;
			if (slurm_get_job_steps(
				    0, selected_step->jobid, NO_VAL,
				    &step_ptr, SHOW_ALL)) {
				error("couldn't get steps for job %u",
				      selected_step->jobid);
				continue;
			}

			for (i = 0; i < step_ptr->job_step_count; i++) {
				_do_stat(selected_step->jobid,
					 step_ptr->job_steps[i].step_id,
					 step_ptr->job_steps[i].nodes,
					 step_ptr->job_steps[i].cpu_freq);
			}
			slurm_free_job_step_info_response_msg(step_ptr);
			continue;
		} else {
			/* get the first running step to query against. */
			job_step_info_response_msg_t *step_ptr = NULL;
			if (slurm_get_job_steps(
				    0, selected_step->jobid, NO_VAL,
				    &step_ptr, SHOW_ALL)) {
				error("couldn't get steps for job %u",
				      selected_step->jobid);
				continue;
			}
			if (!step_ptr->job_step_count) {
				error("no steps running for job %u",
				      selected_step->jobid);
				continue;
			}
			stepid = step_ptr->job_steps[0].step_id;
			nodelist = step_ptr->job_steps[0].nodes;
			req_cpufreq = step_ptr->job_steps[0].cpu_freq;
		}
		_do_stat(selected_step->jobid, stepid, nodelist, req_cpufreq);
		if (free_nodelist && nodelist)
			free(nodelist);
	}
	list_iterator_destroy(itr);

	xfree(params.opt_field_list);
	if (params.opt_job_list)
		list_destroy(params.opt_job_list);

	if (print_fields_itr)
		list_iterator_destroy(print_fields_itr);
	if (print_fields_list)
		list_destroy(print_fields_list);

	return 0;
}
示例#5
0
/*
 * scontrol_print_step - print the specified job step's information
 * IN job_step_id_str - job step's id or NULL to print information
 *	about all job steps
 */
extern void
scontrol_print_step (char *job_step_id_str)
{
	int error_code, i, print_cnt = 0;
	uint32_t job_id = NO_VAL, step_id = NO_VAL;
	uint16_t array_id = (uint16_t) NO_VAL;
	char *next_str;
	job_step_info_response_msg_t *job_step_info_ptr;
	job_step_info_t * job_step_ptr;
	static uint32_t last_job_id = 0, last_array_id, last_step_id = 0;
	static job_step_info_response_msg_t *old_job_step_info_ptr = NULL;
	static uint16_t last_show_flags = 0xffff;
	uint16_t show_flags = 0;

	if (job_step_id_str) {
		job_id = (uint32_t) strtol (job_step_id_str, &next_str, 10);
		if (next_str[0] == '_')
			array_id = (uint16_t) strtol(next_str+1, &next_str, 10);
		if (next_str[0] == '.')
			step_id = (uint32_t) strtol (next_str+1, NULL, 10);
	}

	if (all_flag)
		show_flags |= SHOW_ALL;

	if ((old_job_step_info_ptr) && (last_job_id == job_id) &&
	    (last_array_id == array_id) && (last_step_id == step_id)) {
		if (last_show_flags != show_flags)
			old_job_step_info_ptr->last_update = (time_t) 0;
		error_code = slurm_get_job_steps (
			old_job_step_info_ptr->last_update,
			job_id, step_id, &job_step_info_ptr,
			show_flags);
		if (error_code == SLURM_SUCCESS)
			slurm_free_job_step_info_response_msg (
				old_job_step_info_ptr);
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			job_step_info_ptr = old_job_step_info_ptr;
			error_code = SLURM_SUCCESS;
			if (quiet_flag == -1)
				printf ("slurm_get_job_steps no change in data\n");
		}
	} else {
		if (old_job_step_info_ptr) {
			slurm_free_job_step_info_response_msg (
				old_job_step_info_ptr);
			old_job_step_info_ptr = NULL;
		}
		error_code = slurm_get_job_steps ( (time_t) 0, job_id, step_id,
						   &job_step_info_ptr,
						   show_flags);
	}

	if (error_code) {
		exit_code = 1;
		if (quiet_flag != 1)
			slurm_perror ("slurm_get_job_steps error");
		return;
	}

	old_job_step_info_ptr = job_step_info_ptr;
	last_show_flags = show_flags;
	last_job_id = job_id;
	last_step_id = step_id;

	if (quiet_flag == -1) {
		char time_str[32];
		slurm_make_time_str ((time_t *)&job_step_info_ptr->last_update,
			             time_str, sizeof(time_str));
		printf ("last_update_time=%s, records=%d\n",
			time_str, job_step_info_ptr->job_step_count);
	}

	job_step_ptr = job_step_info_ptr->job_steps ;
	for (i = 0, job_step_ptr = job_step_info_ptr->job_steps;
	     i < job_step_info_ptr->job_step_count; i++, job_step_ptr++) {
		if ((array_id != (uint16_t) NO_VAL) &&
		    (array_id != job_step_ptr->array_task_id))
			continue;
		slurm_print_job_step_info(stdout, job_step_ptr, one_liner);
		print_cnt++;
	}

	if (print_cnt == 0) {
		if (job_step_id_str) {
			exit_code = 1;
			if (quiet_flag != 1) {
				if (array_id == (uint16_t) NO_VAL) {
					printf ("Job step %u.%u not found\n",
						job_id, step_id);
				} else {
					printf ("Job step %u_%u.%u not found\n",
						job_id, array_id, step_id);
				}
			}
		} else if (quiet_flag != 1)
			printf ("No job steps in the system\n");
	}
}
示例#6
0
文件: sview.c 项目: artpol84/slurm
extern void _change_cluster_main(GtkComboBox *combo, gpointer extra)
{
	GtkTreeModel *model;
	display_data_t *display_data;
	GtkTreeIter iter;
	slurmdb_cluster_rec_t *cluster_rec = NULL;
	char *tmp, *ui_description;
	GError *error = NULL;
	GtkWidget *node_tab = NULL;
	int rc;
	bool got_grid = 0;

	if (!gtk_combo_box_get_active_iter(combo, &iter)) {
		g_print("nothing selected\n");
		return;
	}
	model = gtk_combo_box_get_model(combo);
	if (!model) {
		g_print("nothing selected\n");
		return;
	}

	gtk_tree_model_get(model, &iter, 1, &cluster_rec, -1);
	if (!cluster_rec) {
		g_print("no cluster_rec pointer here!");
		return;
	}

	/* From testing it doesn't appear you can get here without a
	   legitimate change, so there isn't a need to check if we are
	   going back to the same cluster we were just at.
	*/
	/* if (working_cluster_rec) { */
	/*	if (!xstrcmp(cluster_rec->name, working_cluster_rec->name)) */
	/*		return; */
	/* } */

	/* free old info under last cluster */
	slurm_free_block_info_msg(g_block_info_ptr);
	g_block_info_ptr = NULL;
	slurm_free_front_end_info_msg(g_front_end_info_ptr);
	g_front_end_info_ptr = NULL;
	slurm_free_burst_buffer_info_msg(g_bb_info_ptr);
	g_bb_info_ptr = NULL;
	slurm_free_job_info_msg(g_job_info_ptr);
	g_job_info_ptr = NULL;
	slurm_free_node_info_msg(g_node_info_ptr);
	g_node_info_ptr = NULL;
	slurm_free_partition_info_msg(g_part_info_ptr);
	g_part_info_ptr = NULL;
	slurm_free_reservation_info_msg(g_resv_info_ptr);
	g_resv_info_ptr = NULL;
	slurm_free_ctl_conf(g_ctl_info_ptr);
	g_ctl_info_ptr = NULL;
	slurm_free_job_step_info_response_msg(g_step_info_ptr);
	g_step_info_ptr = NULL;
	slurm_free_topo_info_msg(g_topo_info_msg_ptr);
	g_topo_info_msg_ptr = NULL;

	/* set up working_cluster_rec */
	if (cluster_dims > 1) {
		/* reset from a multi-dim cluster */
		working_sview_config.grid_x_width =
			default_sview_config.grid_x_width;
		working_sview_config.grid_hori = default_sview_config.grid_hori;
		working_sview_config.grid_vert = default_sview_config.grid_vert;
	}
	gtk_table_set_col_spacings(main_grid_table, 0);
	gtk_table_set_row_spacings(main_grid_table, 0);

	if (!orig_cluster_name)
		orig_cluster_name = slurm_get_cluster_name();
	if (!xstrcmp(cluster_rec->name, orig_cluster_name))
		working_cluster_rec = NULL;
	else
		working_cluster_rec = cluster_rec;
	cluster_dims = slurmdb_setup_cluster_dims();
	cluster_flags = slurmdb_setup_cluster_flags();

	display_data = main_display_data;
	while (display_data++) {
		if (display_data->id == -1)
			break;
		if (cluster_flags & CLUSTER_FLAG_BG) {
			switch(display_data->id) {
			case BLOCK_PAGE:
				display_data->show = true;
				break;
			case NODE_PAGE:
				display_data->name = "Midplanes";
				break;
			default:
				break;
			}
		} else {
			switch(display_data->id) {
			case BLOCK_PAGE:
				display_data->show = false;
				break;
			case NODE_PAGE:
				display_data->name = "Nodes";
				break;
			default:
				break;
			}
		}
	}

	/* set up menu */
	ui_description = _get_ui_description();
	gtk_ui_manager_remove_ui(g_ui_manager, g_menu_id);
	if (!(g_menu_id = gtk_ui_manager_add_ui_from_string(
		      g_ui_manager, ui_description, -1, &error))) {
		xfree(ui_description);
		g_error("building menus failed: %s", error->message);
		g_error_free (error);
		exit (0);
	}
	xfree(ui_description);

	/* make changes for each object */
	cluster_change_block();
	cluster_change_front_end();
	cluster_change_resv();
	cluster_change_part();
	cluster_change_job();
	cluster_change_node();
	cluster_change_bb();

	/* destroy old stuff */
	if (grid_button_list) {
		FREE_NULL_LIST(grid_button_list);
		got_grid = 1;
	}

	select_g_ba_fini();

	/* sorry popups can't survive a cluster change */
	if (popup_list)
		list_flush(popup_list);
	if (signal_params_list)
		list_flush(signal_params_list);
	if (signal_params_list)
		list_flush(signal_params_list);
	if (g_switch_nodes_maps)
		free_switch_nodes_maps(g_switch_nodes_maps);

	/* change the node tab name if needed */
	node_tab = gtk_notebook_get_nth_page(
		GTK_NOTEBOOK(main_notebook), NODE_PAGE);
	node_tab = gtk_notebook_get_tab_label(GTK_NOTEBOOK(main_notebook),
					      node_tab);
#ifdef GTK2_USE_GET_FOCUS

	/* ok, now we have a table which we have set up to contain an
	 * event_box which contains the label we are interested.  We
	 * setup this label to be the focus child of the table, so all
	 * we have to do is grab that and we are set. */
	node_tab = gtk_container_get_focus_child(GTK_CONTAINER(node_tab));
#else
	/* See above comment.  Since gtk_container_get_focus_child
	 * doesn't exist yet we will just traverse the children until
	 * we find the label widget and then break.
	 */
	{
		int i = 0;
		GList *children = gtk_container_get_children(
			GTK_CONTAINER(node_tab));
		while ((node_tab = g_list_nth_data(children, i++))) {
			int j = 0;
			GList *children2 = gtk_container_get_children(
				GTK_CONTAINER(node_tab));
			while ((node_tab = g_list_nth_data(children2, j++))) {
				if (GTK_IS_LABEL(node_tab))
					break;
			}
			g_list_free(children2);
			if (node_tab)
				break;
		}
		g_list_free(children);
	}
#endif
	if (node_tab)
		gtk_label_set_text(GTK_LABEL(node_tab),
				   main_display_data[NODE_PAGE].name);

	/* The name in the visible tabs is easier since it is really
	   just a button with a label on it.
	*/
	if (default_sview_config.page_check_widget[NODE_PAGE]) {
		gtk_button_set_label(GTK_BUTTON(default_sview_config.
						page_check_widget[NODE_PAGE]),
				     main_display_data[NODE_PAGE].name);
	}

	/* reinit */
	rc = get_system_stats(main_grid_table);

	if (rc == SLURM_SUCCESS) {
		/* It turns out if we didn't have the grid (cluster
		   not responding) before the
		   new grid doesn't get set up correctly.  Redoing the
		   system_stats fixes it.  There is probably a better
		   way of doing this, but it doesn't happen very often
		   and isn't that bad to handle every once in a while.
		*/
		if (!got_grid) {
			/* I know we just did this before, but it
			   needs to be done again here.
			*/
			FREE_NULL_LIST(grid_button_list);
			get_system_stats(main_grid_table);
		}

		refresh_main(NULL, NULL);
	}

	tmp = g_strdup_printf("Cluster changed to %s", cluster_rec->name);
	display_edit_note(tmp);
	g_free(tmp);
}