Beispiel #1
0
extern uint16_t
scontrol_get_job_state(uint32_t job_id)
{
	job_info_msg_t * job_buffer_ptr = NULL;
	int error_code = SLURM_SUCCESS, i;
	job_info_t *job_ptr = NULL;

	error_code = scontrol_load_job(&job_buffer_ptr, job_id);
	if (error_code) {
		exit_code = 1;
		if (quiet_flag == -1)
			slurm_perror ("slurm_load_job error");
		return (uint16_t) NO_VAL;
	}
	if (quiet_flag == -1) {
		char time_str[32];
		slurm_make_time_str((time_t *)&job_buffer_ptr->last_update,
				    time_str, sizeof(time_str));
		printf("last_update_time=%s, records=%d\n",
		       time_str, job_buffer_ptr->record_count);
	}

	job_ptr = job_buffer_ptr->job_array ;
	for (i = 0; i < job_buffer_ptr->record_count; i++) {
		if (job_ptr->job_id == job_id)
			return job_ptr->job_state;
	}
	if (quiet_flag == -1)
		printf("Could not find job %u", job_id);
	return (uint16_t) NO_VAL;
}
Beispiel #2
0
/*
 * scontrol_print_job - print the specified job's information
 * IN job_id - job's id or NULL to print information about all jobs
 */
extern void
scontrol_print_job (char * job_id_str)
{
	int error_code = SLURM_SUCCESS, i, print_cnt = 0;
	uint32_t job_id = 0;
	uint16_t array_id = (uint16_t) NO_VAL;
	job_info_msg_t * job_buffer_ptr = NULL;
	job_info_t *job_ptr = NULL;
	char *end_ptr = NULL;

	if (job_id_str) {
		job_id = (uint32_t) strtol (job_id_str, &end_ptr, 10);
		if (end_ptr[0] == '_')
			array_id = strtol( end_ptr + 1, &end_ptr, 10 );
	}

	error_code = scontrol_load_job(&job_buffer_ptr, job_id);
	if (error_code) {
		exit_code = 1;
		if (quiet_flag != 1)
			slurm_perror ("slurm_load_jobs error");
		return;
	}
	if (quiet_flag == -1) {
		char time_str[32];
		slurm_make_time_str ((time_t *)&job_buffer_ptr->last_update,
				     time_str, sizeof(time_str));
		printf ("last_update_time=%s, records=%d\n",
			time_str, job_buffer_ptr->record_count);
	}

	job_ptr = job_buffer_ptr->job_array ;
	for (i = 0, job_ptr = job_buffer_ptr->job_array;
	     i < job_buffer_ptr->record_count; i++, job_ptr++) {
		if ((array_id != (uint16_t) NO_VAL) &&
		    (array_id != job_ptr->array_task_id))
			continue;
		slurm_print_job_info(stdout, job_ptr, one_liner);
		print_cnt++;
	}

	if (print_cnt == 0) {
		if (job_id_str) {
			exit_code = 1;
			if (quiet_flag != 1) {
				if (array_id == (uint16_t) NO_VAL) {
					printf("Job %u not found\n", job_id);
				} else {
					printf("Job %u_%u not found\n",
					       job_id, array_id);
				}
			}
		} else if (quiet_flag != 1)
			printf ("No jobs in the system\n");
	}
}
Beispiel #3
0
/* Translate a job name to relevant job IDs
 * NOTE: xfree the return value to avoid memory leak */
static char *_job_name2id(char *job_name, uint32_t job_uid)
{
	int i, rc;
	job_info_msg_t *resp;
	slurm_job_info_t *job_ptr;
	char *job_id_str = NULL, *sep = "";

	xassert(job_name);

	rc = scontrol_load_job(&resp, 0);
	if (rc == SLURM_SUCCESS) {
		if (resp->record_count == 0) {
			error("JobName %s not found", job_name);
			slurm_free_job_info_msg(resp);
			return job_id_str;
		}
		for (i = 0, job_ptr = resp->job_array; i < resp->record_count;
		     i++, job_ptr++) {
			if ((job_uid != NO_VAL) &&
			    (job_uid != job_ptr->user_id))
				continue;
			if (!job_ptr->name || xstrcmp(job_name, job_ptr->name))
				continue;
			if (job_ptr->array_task_id != NO_VAL) {
				xstrfmtcat(job_id_str, "%s%u_%u", sep,
					   job_ptr->array_job_id,
					   job_ptr->array_task_id);
			} else {
				xstrfmtcat(job_id_str, "%s%u", sep,
					   job_ptr->job_id);
			}
			sep = ",";
		}
		if (!job_id_str) {
			if (job_uid == NO_VAL) {
				error("No jobs with name \'%s\'", job_name);
			} else {
				error("No jobs with user ID %u and name \'%s\'",
				      job_uid, job_name);
			}
		}
	} else {
		error("Could not load state information: %m");
	}

	return job_id_str;
}
Beispiel #4
0
/*
 * scontrol_print_completing - print jobs in completing state and
 *	associated nodes in COMPLETING or DOWN state
 */
extern void
scontrol_print_completing (void)
{
	int error_code, i;
	job_info_msg_t  *job_info_msg;
	job_info_t      *job_info;
	node_info_msg_t *node_info_msg;
	uint16_t         show_flags = 0;

	error_code = scontrol_load_job (&job_info_msg, 0);
	if (error_code) {
		exit_code = 1;
		if (quiet_flag != 1)
			slurm_perror ("slurm_load_jobs error");
		return;
	}
	/* Must load all nodes including hidden for cross-index
	 * from job's node_inx to node table to work */
	/*if (all_flag)		Always set this flag */
	show_flags |= SHOW_ALL;
	if (federation_flag)
		show_flags |= SHOW_FEDERATION;
	if (local_flag)
		show_flags |= SHOW_LOCAL;
	error_code = scontrol_load_nodes(&node_info_msg, show_flags);
	if (error_code) {
		exit_code = 1;
		if (quiet_flag != 1)
			slurm_perror ("slurm_load_nodes error");
		return;
	}

	/* Scan the jobs for completing state */
	job_info = job_info_msg->job_array;
	for (i = 0; i < job_info_msg->record_count; i++) {
		if (job_info[i].job_state & JOB_COMPLETING)
			scontrol_print_completing_job(&job_info[i],
						      node_info_msg);
	}
	slurm_free_node_info_msg(node_info_msg);
}
Beispiel #5
0
/*
 * scontrol_hold - perform some job hold/release operation
 * IN op	- hold/release operation
 * IN job_str	- a job ID or job name
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *		error message and returns 0
 */
extern int
scontrol_hold(char *op, char *job_str)
{
	static uint32_t last_job_id = NO_VAL;
	static job_info_msg_t *jobs = NULL;
	job_array_resp_msg_t *resp = NULL;
	int i, rc = SLURM_SUCCESS, rc2;
	int j;
	job_desc_msg_t job_msg;
	uint32_t job_id = 0;
	char *job_name = NULL;
	char *job_id_str = NULL;
	slurm_job_info_t *job_ptr;

	if (job_str && !xstrncasecmp(job_str, "JobID=", 6))
		job_str += 6;
	if (job_str && !xstrncasecmp(job_str, "Job=", 4))
		job_str += 4;

	slurm_init_job_desc_msg (&job_msg);
	if ((xstrncasecmp(op, "holdu", 5) == 0) ||
	    (xstrncasecmp(op, "uhold", 5) == 0)) {
		job_msg.priority = 0;
		job_msg.alloc_sid = ALLOC_SID_USER_HOLD;
	} else if (xstrncasecmp(op, "hold", 4) == 0) {
		job_msg.priority = 0;
		job_msg.alloc_sid = 0;
	} else
		job_msg.priority = INFINITE;

	if (_is_job_id(job_str)) {
		while ((job_msg.job_id_str = _next_job_id())) {
			rc2 = slurm_update_job2(&job_msg, &resp);
			if (rc2 != SLURM_SUCCESS) {
				rc2 = slurm_get_errno();
				rc = MAX(rc, rc2);
				exit_code = 1;
				if (quiet_flag != 1) {
					fprintf(stderr, "%s for job %s\n",
						slurm_strerror(rc2),
						job_msg.job_id_str);
				}
			} else if (resp) {
				for (i = 0; i < resp->job_array_count; i++) {
					if ((resp->error_code[i]==SLURM_SUCCESS)
					    && (resp->job_array_count == 1))
						continue;
					exit_code = 1;
					if (quiet_flag == 1)
						continue;
					fprintf(stderr, "%s: %s\n",
						resp->job_array_id[i],
						slurm_strerror(resp->
							       error_code[i]));
				}
				slurm_free_job_array_resp(resp);
				resp = NULL;
			}
			job_msg.job_id_str = _next_job_id();
		}
		return rc;
	} else if (job_str) {
		if (!xstrncasecmp(job_str, "Name=", 5)) {
			job_str += 5;
			job_id = 0;
			job_name = job_str;
			last_job_id = NO_VAL;
		} else {
			exit_code = 1;
			rc = ESLURM_INVALID_JOB_ID;
			slurm_seterrno(rc);
			if (quiet_flag != 1) {
				fprintf(stderr, "%s for job %s\n",
					slurm_strerror(rc), job_str);
			}
			return rc;
		}
	} else {
		last_job_id = NO_VAL;	/* Refresh cache on next call */
		return 0;
	}

	if (last_job_id != job_id) {
		if (scontrol_load_job(&jobs, job_id)) {
			if (quiet_flag == -1)
				slurm_perror ("slurm_load_job error");
			return 1;
		}
		last_job_id = job_id;
	}

	/* set current user, needed e.g., for AllowGroups checks */
	for (i = 0, job_ptr = jobs->job_array; i < jobs->record_count;
	     i++, job_ptr++) {
		if (xstrcmp(job_name, job_ptr->name))
			continue;

		if (!IS_JOB_PENDING(job_ptr)) {
			if (job_ptr->array_task_id != NO_VAL)
				continue;
			slurm_seterrno(ESLURM_JOB_NOT_PENDING);
			rc = MAX(rc, ESLURM_JOB_NOT_PENDING);
		}

		if (job_ptr->array_task_str) {
			xstrfmtcat(job_id_str, "%u_%s",
				   job_ptr->array_job_id,
				   job_ptr->array_task_str);
		} else if (job_ptr->array_task_id != NO_VAL) {
			xstrfmtcat(job_id_str, "%u_%u",
				   job_ptr->array_job_id,
				   job_ptr->array_task_id);
		} else {
			xstrfmtcat(job_id_str, "%u", job_ptr->job_id);
		}
		job_msg.job_id_str = job_id_str;
		rc2 = slurm_update_job2(&job_msg, &resp);
		if (rc2 != SLURM_SUCCESS) {
			rc2 = slurm_get_errno();
			rc = MAX(rc, rc2);
			exit_code = 1;
			if (quiet_flag != 1) {
				fprintf(stderr, "%s for job %s\n",
					slurm_strerror(rc2),
					job_msg.job_id_str);
			}
		} else if (resp) {
			for (j = 0; j < resp->job_array_count; j++) {
				if ((resp->error_code[j] == SLURM_SUCCESS) &&
				    (resp->job_array_count == 1))
					continue;
				exit_code = 1;
				if (quiet_flag == 1)
					continue;
				fprintf(stderr, "%s: %s\n",
					resp->job_array_id[j],
					slurm_strerror(resp->error_code[j]));
			}
			slurm_free_job_array_resp(resp);
			resp = NULL;
		}
		xfree(job_id_str);
	}

	return rc;
}
Beispiel #6
0
/*
 * scontrol_print_job - print the specified job's information
 * IN job_id - job's id or NULL to print information about all jobs
 */
extern void scontrol_print_job(char * job_id_str)
{
	int error_code = SLURM_SUCCESS, i, print_cnt = 0;
	uint32_t job_id = 0;
	uint32_t array_id = NO_VAL, pack_job_offset = NO_VAL;
	job_info_msg_t * job_buffer_ptr = NULL;
	job_info_t *job_ptr = NULL;
	char *end_ptr = NULL;

	if (job_id_str) {
		char *tmp_job_ptr = job_id_str;
		/*
		 * Check that the input is a valid job id (i.e. 123 or 123_456).
		 */
		while (*tmp_job_ptr) {
			if (!isdigit(*tmp_job_ptr) &&
			    (*tmp_job_ptr != '_') && (*tmp_job_ptr != '+')) {
				exit_code = 1;
				slurm_seterrno(ESLURM_INVALID_JOB_ID);
				if (quiet_flag != 1)
					slurm_perror("scontrol_print_job error");
				return;
			}
			++tmp_job_ptr;
		}
		job_id = (uint32_t) strtol (job_id_str, &end_ptr, 10);
		if (end_ptr[0] == '_')
			array_id = strtol(end_ptr + 1, &end_ptr, 10);
		if (end_ptr[0] == '+')
			pack_job_offset = strtol(end_ptr + 1, &end_ptr, 10);
	}

	error_code = scontrol_load_job(&job_buffer_ptr, job_id);
	if (error_code) {
		exit_code = 1;
		if (quiet_flag != 1)
			slurm_perror ("slurm_load_jobs error");
		return;
	}
	if (quiet_flag == -1) {
		char time_str[32];
		slurm_make_time_str ((time_t *)&job_buffer_ptr->last_update,
				     time_str, sizeof(time_str));
		printf ("last_update_time=%s, records=%d\n",
			time_str, job_buffer_ptr->record_count);
	}

	for (i = 0, job_ptr = job_buffer_ptr->job_array;
	     i < job_buffer_ptr->record_count; i++, job_ptr++) {
		char *save_array_str = NULL;
		uint32_t save_task_id = 0;
		if (!_pack_id_match(job_ptr, pack_job_offset))
			continue;
		if (!_task_id_in_job(job_ptr, array_id))
			continue;
		if ((array_id != NO_VAL) && job_ptr->array_task_str) {
			save_array_str = job_ptr->array_task_str;
			job_ptr->array_task_str = NULL;
			save_task_id = job_ptr->array_task_id;
			job_ptr->array_task_id = array_id;
		}
		slurm_print_job_info(stdout, job_ptr, one_liner);
		if (save_array_str) {
			job_ptr->array_task_str = save_array_str;
			job_ptr->array_task_id = save_task_id;
		}
		print_cnt++;
	}

	if (print_cnt == 0) {
		if (job_id_str) {
			exit_code = 1;
			if (quiet_flag != 1) {
				if (array_id != NO_VAL) {
					printf("Job %u_%u not found\n",
					       job_id, array_id);
				} else if (pack_job_offset != NO_VAL) {
					printf("Job %u+%u not found\n",
					       job_id, pack_job_offset);
				} else {
					printf("Job %u not found\n", job_id);
				}
			}
		} else if (quiet_flag != 1)
			printf ("No jobs in the system\n");
	}
}
Beispiel #7
0
/*
 * scontrol_hold - perform some job hold/release operation
 * IN op - suspend/resume operation
 * IN job_id_str - a job id
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *		error message and returns 0
 */
extern int
scontrol_hold(char *op, char *job_id_str)
{
	int i, rc = SLURM_SUCCESS;
	char *next_str;
	job_desc_msg_t job_msg;
	uint32_t job_id;
	uint32_t array_id;
	job_info_msg_t *resp;
	slurm_job_info_t *job_ptr;

	if (job_id_str) {
		job_id = (uint32_t) strtol(job_id_str, &next_str, 10);
		if (next_str[0] == '_')
			array_id = strtol(next_str+1, &next_str, 10);
		else
			array_id = NO_VAL;
		if ((job_id == 0) || (next_str[0] != '\0')) {
			fprintf(stderr, "Invalid job id specified\n");
			return 1;
		}
	} else {
		fprintf(stderr, "Invalid job id specified\n");
		return 1;
	}

	if (scontrol_load_job(&resp, job_id)) {
		if (quiet_flag == -1)
			slurm_perror ("slurm_load_job error");
		return 1;
	}

	slurm_init_job_desc_msg (&job_msg);
	job_msg.job_id = job_id;
	/* set current user, needed e.g., for AllowGroups checks */
	job_msg.user_id = getuid();
	if ((strncasecmp(op, "holdu", 5) == 0) ||
	    (strncasecmp(op, "uhold", 5) == 0)) {
		job_msg.priority = 0;
		job_msg.alloc_sid = ALLOC_SID_USER_HOLD;
	} else if (strncasecmp(op, "hold", 4) == 0) {
		job_msg.priority = 0;
		job_msg.alloc_sid = 0;
	} else
		job_msg.priority = INFINITE;
	for (i = 0, job_ptr = resp->job_array; i < resp->record_count;
	     i++, job_ptr++) {
		if ((array_id != NO_VAL) &&
		    (job_ptr->array_task_id != array_id))
			continue;

		if (!IS_JOB_PENDING(job_ptr)) {
			if ((array_id == NO_VAL) &&
			    (job_ptr->array_task_id != NO_VAL))
				continue;
			slurm_seterrno(ESLURM_JOB_NOT_PENDING);
			return ESLURM_JOB_NOT_PENDING;
		}

		job_msg.job_id = job_ptr->job_id;
		if (slurm_update_job(&job_msg))
			rc = slurm_get_errno();
	}

	return rc;
}