Ejemplo n.º 1
0
/* _print_job_step - print the specified job step's information */
static int
_print_job_steps( bool clear_old )
{
	int error_code;
	static job_step_info_response_msg_t * old_step_ptr = NULL;
	static job_step_info_response_msg_t  * new_step_ptr;
	uint16_t show_flags = 0;

	if (params.all_flag)
		show_flags |= SHOW_ALL;

	if (old_step_ptr) {
		if (clear_old)
			old_step_ptr->last_update = 0;
		/* Use a last_update time of 0 so that we can get an updated
		 * run_time for jobs rather than just its start_time */
		error_code = slurm_get_job_steps((time_t) 0, NO_VAL, NO_VAL,
						 &new_step_ptr, show_flags);
		if (error_code ==  SLURM_SUCCESS)
			slurm_free_job_step_info_response_msg( old_step_ptr );
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_step_ptr = old_step_ptr;
		}
	} else {
		error_code = slurm_get_job_steps((time_t) 0, NO_VAL, NO_VAL,
						 &new_step_ptr, show_flags);
	}
	if (error_code) {
		slurm_perror ("slurm_get_job_steps error");
		return SLURM_ERROR;
	}
	old_step_ptr = new_step_ptr;

	if (params.verbose) {
		printf ("last_update_time=%ld records=%u\n",
			(long) new_step_ptr->last_update,
			new_step_ptr->job_step_count);
	}

	if (!params.format && !params.format_long)
		params.format = "%.15i %.8j %.9P %.8u %.9M %N";

	if (!params.format_list) {
		if (params.format)
			parse_format(params.format);
		else if (params.format_long)
			parse_long_format(params.format_long);
	}

	print_steps_array( new_step_ptr->job_steps,
			   new_step_ptr->job_step_count,
			   params.format_list );
	return SLURM_SUCCESS;
}
Ejemplo n.º 2
0
/*
 * slurm_terminate_job_step - terminates a job step by sending a
 * 	REQUEST_TERMINATE_TASKS rpc to all slurmd of a job step.
 * IN job_id  - the job's id
 * IN step_id - the job step's id - use SLURM_BATCH_SCRIPT as the step_id
 *              to terminate a job's batch script
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
extern int
slurm_terminate_job_step (uint32_t job_id, uint32_t step_id)
{
	resource_allocation_response_msg_t *alloc_info = NULL;
	job_step_info_response_msg_t *step_info = NULL;
	int rc = 0;
	int i;
	int save_errno = 0;

	if (slurm_allocation_lookup_lite(job_id, &alloc_info)) {
		return -1;
	}

	/*
	 * The controller won't give us info about the batch script job step,
	 * so we need to handle that seperately.
	 */
	if (step_id == SLURM_BATCH_SCRIPT) {
		rc = _terminate_batch_script_step(alloc_info);
		slurm_free_resource_allocation_response_msg(alloc_info);
		errno = rc;
		return rc ? -1 : 0;
	}

	/*
	 * Otherwise, look through the list of job step info and find
	 * the one matching step_id.  Terminate that step.
	 */
	rc = slurm_get_job_steps((time_t)0, job_id, step_id,
				 &step_info, SHOW_ALL);
	if (rc != 0) {
		save_errno = errno;
		goto fail;
	}
	for (i = 0; i < step_info->job_step_count; i++) {
		if ((step_info->job_steps[i].job_id == job_id) &&
		    (step_info->job_steps[i].step_id == step_id)) {
			rc = _terminate_job_step(&step_info->job_steps[i],
						 alloc_info);
			save_errno = errno;
			break;
		}
	}
	slurm_free_job_step_info_response_msg(step_info);
fail:
	slurm_free_resource_allocation_response_msg(alloc_info);
	errno = save_errno;
	return rc ? -1 : 0;
}
Ejemplo n.º 3
0
/* Return the current time limit of the specified job/step_id or NO_VAL if the
 * information is not available */
static uint32_t _get_step_time(uint32_t job_id, uint32_t step_id)
{
	uint32_t time_limit = NO_VAL;
	int i, rc;
	job_step_info_response_msg_t *resp;

	rc = slurm_get_job_steps((time_t) 0, job_id, step_id, &resp, SHOW_ALL);
	if (rc == SLURM_SUCCESS) {
		for (i = 0; i < resp->job_step_count; i++) {
			if ((resp->job_steps[i].job_id != job_id) ||
			    (resp->job_steps[i].step_id != step_id))
				continue;	/* should not happen */
			time_limit = resp->job_steps[i].time_limit;
			break;
		}
		slurm_free_job_step_info_response_msg(resp);
	} else {
		error("Could not load state information for step %u.%u: %m",
		      job_id, step_id);
	}

	return time_limit;
}
Ejemplo n.º 4
0
Archivo: sstat.c Proyecto: Cray/slurm
int main(int argc, char **argv)
{
	ListIterator itr = NULL;
	uint32_t req_cpufreq = NO_VAL;
	uint32_t stepid = NO_VAL;
	slurmdb_selected_step_t *selected_step = NULL;

#ifdef HAVE_ALPS_CRAY
	error("The sstat command is not supported on Cray systems");
	return 1;
#endif
#ifdef HAVE_BG
	error("The sstat command is not supported on IBM BlueGene systems");
	return 1;
#endif

	slurm_conf_init(NULL);
	print_fields_list = list_create(NULL);
	print_fields_itr = list_iterator_create(print_fields_list);

	parse_command_line(argc, argv);
	if (!params.opt_job_list || !list_count(params.opt_job_list)) {
		error("You didn't give me any jobs to stat.");
		return 1;
	}

	print_fields_header(print_fields_list);
	itr = list_iterator_create(params.opt_job_list);
	while ((selected_step = list_next(itr))) {
		char *nodelist = NULL;
		bool free_nodelist = false;
		if (selected_step->stepid == INFINITE) {
			/* get the batch step info */
			job_info_msg_t *job_ptr = NULL;
			hostlist_t hl;

			if (slurm_load_job(
				    &job_ptr, selected_step->jobid, SHOW_ALL)) {
				error("couldn't get info for job %u",
				      selected_step->jobid);
				continue;
			}

			stepid = NO_VAL;
			hl = hostlist_create(job_ptr->job_array[0].nodes);
			nodelist = hostlist_pop(hl);
			free_nodelist = true;
			hostlist_destroy(hl);
			slurm_free_job_info_msg(job_ptr);
		} else if (selected_step->stepid != NO_VAL) {
			stepid = selected_step->stepid;
		} else if (params.opt_all_steps) {
			job_step_info_response_msg_t *step_ptr = NULL;
			int i = 0;
			if (slurm_get_job_steps(
				    0, selected_step->jobid, NO_VAL,
				    &step_ptr, SHOW_ALL)) {
				error("couldn't get steps for job %u",
				      selected_step->jobid);
				continue;
			}

			for (i = 0; i < step_ptr->job_step_count; i++) {
				_do_stat(selected_step->jobid,
					 step_ptr->job_steps[i].step_id,
					 step_ptr->job_steps[i].nodes,
					 step_ptr->job_steps[i].cpu_freq);
			}
			slurm_free_job_step_info_response_msg(step_ptr);
			continue;
		} else {
			/* get the first running step to query against. */
			job_step_info_response_msg_t *step_ptr = NULL;
			if (slurm_get_job_steps(
				    0, selected_step->jobid, NO_VAL,
				    &step_ptr, SHOW_ALL)) {
				error("couldn't get steps for job %u",
				      selected_step->jobid);
				continue;
			}
			if (!step_ptr->job_step_count) {
				error("no steps running for job %u",
				      selected_step->jobid);
				continue;
			}
			stepid = step_ptr->job_steps[0].step_id;
			nodelist = step_ptr->job_steps[0].nodes;
			req_cpufreq = step_ptr->job_steps[0].cpu_freq;
		}
		_do_stat(selected_step->jobid, stepid, nodelist, req_cpufreq);
		if (free_nodelist && nodelist)
			free(nodelist);
	}
	list_iterator_destroy(itr);

	xfree(params.opt_field_list);
	if (params.opt_job_list)
		list_destroy(params.opt_job_list);

	if (print_fields_itr)
		list_iterator_destroy(print_fields_itr);
	if (print_fields_list)
		list_destroy(print_fields_list);

	return 0;
}
Ejemplo n.º 5
0
/*
 * scontrol_print_step - print the specified job step's information
 * IN job_step_id_str - job step's id or NULL to print information
 *	about all job steps
 */
extern void
scontrol_print_step (char *job_step_id_str)
{
	int error_code, i, print_cnt = 0;
	uint32_t job_id = NO_VAL, step_id = NO_VAL;
	uint16_t array_id = (uint16_t) NO_VAL;
	char *next_str;
	job_step_info_response_msg_t *job_step_info_ptr;
	job_step_info_t * job_step_ptr;
	static uint32_t last_job_id = 0, last_array_id, last_step_id = 0;
	static job_step_info_response_msg_t *old_job_step_info_ptr = NULL;
	static uint16_t last_show_flags = 0xffff;
	uint16_t show_flags = 0;

	if (job_step_id_str) {
		job_id = (uint32_t) strtol (job_step_id_str, &next_str, 10);
		if (next_str[0] == '_')
			array_id = (uint16_t) strtol(next_str+1, &next_str, 10);
		if (next_str[0] == '.')
			step_id = (uint32_t) strtol (next_str+1, NULL, 10);
	}

	if (all_flag)
		show_flags |= SHOW_ALL;

	if ((old_job_step_info_ptr) && (last_job_id == job_id) &&
	    (last_array_id == array_id) && (last_step_id == step_id)) {
		if (last_show_flags != show_flags)
			old_job_step_info_ptr->last_update = (time_t) 0;
		error_code = slurm_get_job_steps (
			old_job_step_info_ptr->last_update,
			job_id, step_id, &job_step_info_ptr,
			show_flags);
		if (error_code == SLURM_SUCCESS)
			slurm_free_job_step_info_response_msg (
				old_job_step_info_ptr);
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			job_step_info_ptr = old_job_step_info_ptr;
			error_code = SLURM_SUCCESS;
			if (quiet_flag == -1)
				printf ("slurm_get_job_steps no change in data\n");
		}
	} else {
		if (old_job_step_info_ptr) {
			slurm_free_job_step_info_response_msg (
				old_job_step_info_ptr);
			old_job_step_info_ptr = NULL;
		}
		error_code = slurm_get_job_steps ( (time_t) 0, job_id, step_id,
						   &job_step_info_ptr,
						   show_flags);
	}

	if (error_code) {
		exit_code = 1;
		if (quiet_flag != 1)
			slurm_perror ("slurm_get_job_steps error");
		return;
	}

	old_job_step_info_ptr = job_step_info_ptr;
	last_show_flags = show_flags;
	last_job_id = job_id;
	last_step_id = step_id;

	if (quiet_flag == -1) {
		char time_str[32];
		slurm_make_time_str ((time_t *)&job_step_info_ptr->last_update,
			             time_str, sizeof(time_str));
		printf ("last_update_time=%s, records=%d\n",
			time_str, job_step_info_ptr->job_step_count);
	}

	job_step_ptr = job_step_info_ptr->job_steps ;
	for (i = 0, job_step_ptr = job_step_info_ptr->job_steps;
	     i < job_step_info_ptr->job_step_count; i++, job_step_ptr++) {
		if ((array_id != (uint16_t) NO_VAL) &&
		    (array_id != job_step_ptr->array_task_id))
			continue;
		slurm_print_job_step_info(stdout, job_step_ptr, one_liner);
		print_cnt++;
	}

	if (print_cnt == 0) {
		if (job_step_id_str) {
			exit_code = 1;
			if (quiet_flag != 1) {
				if (array_id == (uint16_t) NO_VAL) {
					printf ("Job step %u.%u not found\n",
						job_id, step_id);
				} else {
					printf ("Job step %u_%u.%u not found\n",
						job_id, array_id, step_id);
				}
			}
		} else if (quiet_flag != 1)
			printf ("No job steps in the system\n");
	}
}