Exemplo n.º 1
0
/* get details about this slurm job: jobid and allocated node */
static void _get_job_info(void)
{
	xassert(params.job_id != NO_VAL);

	if (slurm_sbcast_lookup(params.job_id, params.step_id, &sbcast_cred)
	    != SLURM_SUCCESS) {
		if (params.step_id == NO_VAL) {
			error("Slurm job ID %u lookup error: %s",
			      params.job_id, slurm_strerror(slurm_get_errno()));
		} else {
			error("Slurm step ID %u.%u lookup error: %s",
			      params.job_id, params.step_id,
			      slurm_strerror(slurm_get_errno()));
		}
		exit(1);
	}

	if (params.step_id == NO_VAL)
		verbose("jobid      = %u", params.job_id);
	else
		verbose("jobid      = %u.%u", params.job_id, params.step_id);
	verbose("node_cnt   = %u", sbcast_cred->node_cnt);
	verbose("node_list  = %s", sbcast_cred->node_list);
	/* also see sbcast_cred->node_addr (array) */

	if (params.verbose)
		print_sbcast_cred(sbcast_cred->sbcast_cred);

	/* do not bother to release the return message,
	 * we need to preserve and use most of the information later */
}
Exemplo n.º 2
0
/*
 * Test if any BG blocks are in deallocating state since they are
 * probably related to this job we will want to sleep longer
 * RET	1:  deallocate in progress
 *	0:  no deallocate in progress
 *     -1: error occurred
 */
static int _blocks_dealloc(void)
{
	static block_info_msg_t *bg_info_ptr = NULL, *new_bg_ptr = NULL;
	int rc = 0, error_code = 0, i;

	if (bg_info_ptr) {
		error_code = slurm_load_block_info(bg_info_ptr->last_update,
						   &new_bg_ptr, SHOW_ALL);
		if (error_code == SLURM_SUCCESS)
			slurm_free_block_info_msg(bg_info_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_bg_ptr = bg_info_ptr;
		}
	} else {
		error_code = slurm_load_block_info((time_t) NULL,
						   &new_bg_ptr, SHOW_ALL);
	}

	if (error_code) {
		error("slurm_load_partitions: %s",
		      slurm_strerror(slurm_get_errno()));
		return -1;
	}
	for (i=0; i<new_bg_ptr->record_count; i++) {
		if(new_bg_ptr->block_array[i].state == BG_BLOCK_TERM) {
			rc = 1;
			break;
		}
	}
	bg_info_ptr = new_bg_ptr;
	return rc;
}
Exemplo n.º 3
0
/* get details about this slurm job: jobid and allocated node */
static int _get_job_info(struct bcast_parameters *params)
{
	int rc;

	xassert(params->job_id != NO_VAL);

	rc = slurm_sbcast_lookup(params->job_id, params->step_id, &sbcast_cred);
	if (rc != SLURM_SUCCESS) {
		if (params->step_id == NO_VAL) {
			error("Slurm job ID %u lookup error: %s",
			      params->job_id,
			      slurm_strerror(slurm_get_errno()));
		} else {
			error("Slurm step ID %u.%u lookup error: %s",
			      params->job_id, params->step_id,
			      slurm_strerror(slurm_get_errno()));
		}
		return rc;
	}
	if (params->step_id == NO_VAL)
		verbose("jobid      = %u", params->job_id);
	else
		verbose("stepid     = %u.%u", params->job_id, params->step_id);
	verbose("node_cnt   = %u", sbcast_cred->node_cnt);
	verbose("node_list  = %s", sbcast_cred->node_list);
	/* also see sbcast_cred->node_addr (array) */

	if (params->verbose)
		print_sbcast_cred(sbcast_cred->sbcast_cred);

	/* do not bother to release the return message,
	 * we need to preserve and use most of the information later */

	return rc;
}
Exemplo n.º 4
0
/*
 * scontrol_requeue - requeue a pending or running batch job
 * IN job_id_str - a job id
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *              error message and returns 0
 */
extern int
scontrol_requeue(int argc, char **argv)
{
	int rc = SLURM_SUCCESS;
	int i;
	uint32_t *ids;
	uint32_t num_ids;

	if (! argv[0]) {
		exit_code = 1;
		return 0;
	}

	ids = _get_job_ids(argv[0], &num_ids);
	if (ids == NULL) {
		exit_code = 1;
		return 0;
	}

	for (i = 0; i < num_ids; i++) {
		rc = slurm_requeue(ids[i], 0);
		if (rc != SLURM_SUCCESS) {
			fprintf(stderr, "%s  array job_id %u\n",
					slurm_strerror(slurm_get_errno()), ids[i]);
			exit_code = 1;
			break;
		}
	}

	xfree(ids);

	return rc;
}
Exemplo n.º 5
0
/************
 * Functions *
 ************/
static int _get_new_info_block(block_info_msg_t **block_ptr)
{
	int error_code = SLURM_NO_CHANGE_IN_DATA;
#ifdef HAVE_BG
	static block_info_msg_t *bg_info_ptr = NULL;
	static block_info_msg_t *new_bg_ptr = NULL;

	if (bg_info_ptr) {
		error_code = slurm_load_block_info(bg_info_ptr->last_update,
						   &new_bg_ptr, SHOW_ALL);
		if (error_code == SLURM_SUCCESS) {
			slurm_free_block_info_msg(bg_info_ptr);
		} else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_NO_CHANGE_IN_DATA;
			new_bg_ptr = bg_info_ptr;
		}
	} else {
		error_code = slurm_load_block_info((time_t) NULL,
						   &new_bg_ptr, SHOW_ALL);
	}

	bg_info_ptr = new_bg_ptr;

	if (*block_ptr != bg_info_ptr)
		error_code = SLURM_SUCCESS;

	*block_ptr = new_bg_ptr;
#endif
	return error_code;
}
Exemplo n.º 6
0
/*
 * slurm_signal_job - send the specified signal to all steps of an existing job
 * IN job_id     - the job's id
 * IN signal     - signal number
 * RET 0 on success, otherwise return -1 and set errno to indicate the error
 */
extern int
slurm_signal_job (uint32_t job_id, uint16_t signal)
{
	int rc = SLURM_SUCCESS;
	resource_allocation_response_msg_t *alloc_info = NULL;
	signal_job_msg_t rpc;

	if (slurm_allocation_lookup_lite(job_id, &alloc_info)) {
		rc = slurm_get_errno();
		goto fail1;
	}

	/* same remote procedure call for each node */
	rpc.job_id = job_id;
	rpc.signal = (uint32_t)signal;

	rc = _local_send_recv_rc_msgs(alloc_info->node_list,
				      REQUEST_SIGNAL_JOB, &rpc);
	slurm_free_resource_allocation_response_msg(alloc_info);
fail1:
	if (rc) {
		slurm_seterrno_ret(rc);
	} else {
		return SLURM_SUCCESS;
	}
}
Exemplo n.º 7
0
/* Load current node table information into *node_buffer_pptr */
extern int
scontrol_load_nodes (node_info_msg_t ** node_buffer_pptr, uint16_t show_flags)
{
	int error_code;
	static int last_show_flags = 0xffff;
	node_info_msg_t *node_info_ptr = NULL;

	if (old_node_info_ptr) {
		if (last_show_flags != show_flags)
			old_node_info_ptr->last_update = (time_t) 0;
		error_code = slurm_load_node (old_node_info_ptr->last_update,
					      &node_info_ptr, show_flags);
		if (error_code == SLURM_SUCCESS)
			slurm_free_node_info_msg (old_node_info_ptr);
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			node_info_ptr = old_node_info_ptr;
			error_code = SLURM_SUCCESS;
			if (quiet_flag == -1)
				printf ("slurm_load_node no change in data\n");
		}
	}
	else
		error_code = slurm_load_node ((time_t) NULL, &node_info_ptr,
					      show_flags);

	if (error_code == SLURM_SUCCESS) {
		old_node_info_ptr = node_info_ptr;
		last_show_flags = show_flags;
		*node_buffer_pptr = node_info_ptr;
	}

	return error_code;
}
Exemplo n.º 8
0
static resource_allocation_response_msg_t *
_wait_for_allocation_response(uint32_t job_id, const listen_t *listen,
			      int timeout)
{
	resource_allocation_response_msg_t *resp = NULL;
	int errnum;

	info("job %u queued and waiting for resources", job_id);
	if (_wait_for_alloc_rpc(listen, timeout, &resp) <= 0) {
		errnum = errno;
		/* Maybe the resource allocation response RPC got lost
		 * in the mail; surely it should have arrived by now.
		 * Let's see if the controller thinks that the allocation
		 * has been granted.
		 */
		if (slurm_allocation_lookup_lite(job_id, &resp) >= 0) {
			return resp;
		}
		if (slurm_get_errno() == ESLURM_JOB_PENDING) {
			debug3("Still waiting for allocation");
			errno = errnum;
			return NULL;
		} else {
			debug3("Unable to confirm allocation for job %u: %m",
			       job_id);
			return NULL;
		}
	}
	info("job %u has been allocated resources", job_id);
	return resp;
}
Exemplo n.º 9
0
extern int get_new_info_config(slurm_ctl_conf_info_msg_t **info_ptr)
{
	static slurm_ctl_conf_info_msg_t *new_ctl_ptr = NULL;
	int error_code = SLURM_NO_CHANGE_IN_DATA;

	if (g_ctl_info_ptr) {
		error_code = slurm_load_ctl_conf(g_ctl_info_ptr->last_update,
						 &new_ctl_ptr);
		if (error_code == SLURM_SUCCESS)
			slurm_free_ctl_conf(g_ctl_info_ptr);
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_NO_CHANGE_IN_DATA;
			new_ctl_ptr = g_ctl_info_ptr;
		}
	} else {
		new_ctl_ptr = NULL;
		error_code = slurm_load_ctl_conf((time_t) NULL, &new_ctl_ptr);
	}
	g_ctl_info_ptr = new_ctl_ptr;

	if (g_ctl_info_ptr && (*info_ptr != g_ctl_info_ptr))
		error_code = SLURM_SUCCESS;

	*info_ptr = new_ctl_ptr;

	return error_code;

}
Exemplo n.º 10
0
/*
 * Send message to stdout of specified job
 * argv[0] == jobid
 * argv[1]++ the message
 */
extern int
scontrol_job_notify(int argc, char *argv[])
{
	int i;
	uint32_t job_id;
	char *message = NULL;

	job_id = atoi(argv[0]);
	if (job_id <= 0) {
		fprintf(stderr, "Invalid job_id %s", argv[0]);
		return 1;
	}

	for (i=1; i<argc; i++) {
		if (message)
			xstrfmtcat(message, " %s", argv[i]);
		else
			xstrcat(message, argv[i]);
	}

	i = slurm_notify_job(job_id, message);
	xfree(message);

	if (i)
		return slurm_get_errno ();
	else
		return 0;
}
Exemplo n.º 11
0
/*
 * scontrol_create_part - create a slurm partition configuration per the
 *	supplied arguments
 * IN argc - count of arguments
 * IN argv - list of arguments
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *			error message and returns 0
 */
extern int
scontrol_create_part (int argc, char *argv[])
{
	int update_cnt = 0;
	update_part_msg_t part_msg;

	slurm_init_part_desc_msg ( &part_msg );
	scontrol_parse_part_options (argc, argv, &update_cnt, &part_msg);

	if (part_msg.name == NULL) {
		exit_code = 1;
		error("PartitionName must be given.");
		return 0;
	} else if (xstrcasecmp(part_msg.name, "default") == 0) {
		exit_code = 1;
		error("PartitionName cannot be \"DEFAULT\".");
		return 0;
	}

	if (update_cnt == 0) {
		exit_code = 1;
		error("No parameters specified");
		return 0;
	}

	if (slurm_create_partition(&part_msg)) {
		exit_code = 1;
		slurm_perror("Error creating the partition");
		return slurm_get_errno ();
	} else
		return 0;
}
Exemplo n.º 12
0
/*
 * Load current front_end table information into *node_buffer_pptr
 */
extern int
scontrol_load_front_end(front_end_info_msg_t ** front_end_buffer_pptr)
{
	int error_code;
	front_end_info_msg_t *front_end_info_ptr = NULL;

	if (old_front_end_info_ptr) {
		error_code = slurm_load_front_end (
				old_front_end_info_ptr->last_update,
				&front_end_info_ptr);
		if (error_code == SLURM_SUCCESS)
			slurm_free_front_end_info_msg (old_front_end_info_ptr);
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			front_end_info_ptr = old_front_end_info_ptr;
			error_code = SLURM_SUCCESS;
			if (quiet_flag == -1) {
				printf("slurm_load_front_end no change in "
				       "data\n");
			}
		}
	}
	else
		error_code = slurm_load_front_end((time_t) NULL,
						  &front_end_info_ptr);

	if (error_code == SLURM_SUCCESS) {
		old_front_end_info_ptr = front_end_info_ptr;
		*front_end_buffer_pptr = front_end_info_ptr;
	}

	return error_code;
}
Exemplo n.º 13
0
/* Load current partiton table information into *part_buffer_pptr */
extern int
scontrol_load_block (block_info_msg_t **block_info_pptr)
{
	int error_code;
	block_info_msg_t *info_ptr = NULL;
	uint16_t show_flags = 0;

	if (all_flag)
		show_flags |= SHOW_ALL;
	if (old_block_info_ptr) {
		error_code = slurm_load_block_info(
			old_block_info_ptr->last_update, &info_ptr, show_flags);
		if (error_code == SLURM_SUCCESS)
			slurm_free_block_info_msg(old_block_info_ptr);
		else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			info_ptr = old_block_info_ptr;
			error_code = SLURM_SUCCESS;
			if (quiet_flag == -1)
				printf ("slurm_load_block no "
					"change in data\n");
		}
	} else
		error_code = slurm_load_block_info((time_t)NULL,
						   &info_ptr, show_flags);

	if (error_code == SLURM_SUCCESS) {
		old_block_info_ptr = info_ptr;
		*block_info_pptr = info_ptr;
	}

	return error_code;
}
Exemplo n.º 14
0
/*
 * scontrol_update_part - update the slurm partition configuration per the
 *	supplied arguments
 * IN argc - count of arguments
 * IN argv - list of arguments
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *			error message and returns 0
 */
extern int
scontrol_update_part (int argc, char *argv[])
{
	int update_cnt = 0;
	update_part_msg_t part_msg;

	slurm_init_part_desc_msg ( &part_msg );
	scontrol_parse_part_options (argc, argv, &update_cnt, &part_msg);

	if (part_msg.name == NULL) {
		exit_code = 1;
		error("PartitionName must be given.");
		return 0;
	}
	if (update_cnt <= 1) {
		exit_code = 1;
		error("No changes specified");
		return 0;
	}

	if (slurm_update_partition(&part_msg)) {
		exit_code = 1;
		return slurm_get_errno ();
	} else
		return 0;
}
Exemplo n.º 15
0
Arquivo: sbcast.c Projeto: Cray/slurm
/* get details about this slurm job: jobid and allocated node */
static void _get_job_info(void)
{
	char *jobid_str;
	uint32_t jobid;

	jobid_str = getenv("SLURM_JOB_ID");
	if (!jobid_str) {
		error("Command only valid from within SLURM job");
		exit(1);
	}
	jobid = (uint32_t) atol(jobid_str);
	verbose("jobid      = %u", jobid);

	if (slurm_sbcast_lookup(jobid, &sbcast_cred) != SLURM_SUCCESS) {
		error("SLURM jobid %u lookup error: %s",
		      jobid, slurm_strerror(slurm_get_errno()));
		exit(1);
	}

	verbose("node_cnt   = %u", sbcast_cred->node_cnt);
	verbose("node_list  = %s", sbcast_cred->node_list);
	/* also see sbcast_cred->node_addr (array) */

	if (params.verbose)
		print_sbcast_cred(sbcast_cred->sbcast_cred);

	/* do not bother to release the return message,
	 * we need to preserve and use most of the information later */
}
Exemplo n.º 16
0
/* Load current job table information into *job_buffer_pptr */
extern int
scontrol_load_job(job_info_msg_t ** job_buffer_pptr, uint32_t job_id)
{
	int error_code;
	static uint16_t last_show_flags = 0xffff;
	uint16_t show_flags = 0;
	job_info_msg_t * job_info_ptr = NULL;

	if (all_flag)
		show_flags |= SHOW_ALL;

	if (detail_flag) {
		show_flags |= SHOW_DETAIL;
		if (detail_flag > 1)
			show_flags |= SHOW_DETAIL2;
	}
	if (federation_flag)
		show_flags |= SHOW_FEDERATION;
	if (local_flag)
		show_flags |= SHOW_LOCAL;
	if (sibling_flag)
		show_flags |= SHOW_FEDERATION | SHOW_SIBLING;

	if (old_job_info_ptr) {
		if (last_show_flags != show_flags)
			old_job_info_ptr->last_update = (time_t) 0;
		if (job_id) {
			error_code = slurm_load_job(&job_info_ptr, job_id,
						    show_flags);
		} else {
			error_code = slurm_load_jobs(
				old_job_info_ptr->last_update,
				&job_info_ptr, show_flags);
		}
		if (error_code == SLURM_SUCCESS)
			slurm_free_job_info_msg (old_job_info_ptr);
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			job_info_ptr = old_job_info_ptr;
			error_code = SLURM_SUCCESS;
			if (quiet_flag == -1)
 				printf ("slurm_load_jobs no change in data\n");
		}
	} else if (job_id) {
		error_code = slurm_load_job(&job_info_ptr, job_id, show_flags);
	} else {
		error_code = slurm_load_jobs((time_t) NULL, &job_info_ptr,
					     show_flags);
	}

	if (error_code == SLURM_SUCCESS) {
		old_job_info_ptr = job_info_ptr;
		if (job_id)
			old_job_info_ptr->last_update = (time_t) 0;
		last_show_flags  = show_flags;
		*job_buffer_pptr = job_info_ptr;
	}

	return error_code;
}
Exemplo n.º 17
0
extern int switch_p_get_errno(void)
{
	int err = slurm_get_errno();

	if ((err >= ESLURM_SWITCH_MIN) && (err <= ESLURM_SWITCH_MAX))
		return err;

	return SLURM_SUCCESS;
}
Exemplo n.º 18
0
/*
 * scontrol_requeue - requeue a pending or running batch job
 * IN job_id_str - a job id
 */
extern void
scontrol_requeue(char *job_str)
{
	char *job_id_str;
	int rc, i;
	job_array_resp_msg_t *resp = NULL;

	if (!job_str[0]) {
		exit_code = 1;
		return;
	}

	if (xstrncasecmp(job_str, "jobid=", 6) == 0)
		job_str += 6;
	if (xstrncasecmp(job_str, "job=", 4) == 0)
		job_str += 4;

	if (_is_job_id(job_str)) {
		job_id_str = _next_job_id();
		while (job_id_str) {
			rc = slurm_requeue2(job_id_str, 0, &resp);
			if (rc != SLURM_SUCCESS) {
				exit_code = 1;
				if (quiet_flag != 1) {
					fprintf(stderr, "%s for job %s\n",
						slurm_strerror(slurm_get_errno()),
						job_id_str);
				}
			} else if (resp) {
				for (i = 0; i < resp->job_array_count; i++) {
					if ((resp->error_code[i] == SLURM_SUCCESS)
					    && (resp->job_array_count == 1))
						continue;
					exit_code = 1;
					if (quiet_flag == 1)
						continue;
					fprintf(stderr, "%s: %s\n",
						resp->job_array_id[i],
						slurm_strerror(resp->
							       error_code[i]));
				}
				slurm_free_job_array_resp(resp);
				resp = NULL;
			}
			job_id_str = _next_job_id();
		}
	} else {
		exit_code = 1;
		rc = ESLURM_INVALID_JOB_ID;
		slurm_seterrno(rc);
		if (quiet_flag != 1) {
			fprintf(stderr, "%s for job %s\n",
				slurm_strerror(rc), job_str);
		}
	}
}
Exemplo n.º 19
0
/* _print_job_step - print the specified job step's information */
static int
_print_job_steps( bool clear_old )
{
	int error_code;
	static job_step_info_response_msg_t * old_step_ptr = NULL;
	static job_step_info_response_msg_t  * new_step_ptr;
	uint16_t show_flags = 0;

	if (params.all_flag)
		show_flags |= SHOW_ALL;

	if (old_step_ptr) {
		if (clear_old)
			old_step_ptr->last_update = 0;
		/* Use a last_update time of 0 so that we can get an updated
		 * run_time for jobs rather than just its start_time */
		error_code = slurm_get_job_steps((time_t) 0, NO_VAL, NO_VAL,
						 &new_step_ptr, show_flags);
		if (error_code ==  SLURM_SUCCESS)
			slurm_free_job_step_info_response_msg( old_step_ptr );
		else if (slurm_get_errno () == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_SUCCESS;
			new_step_ptr = old_step_ptr;
		}
	} else {
		error_code = slurm_get_job_steps((time_t) 0, NO_VAL, NO_VAL,
						 &new_step_ptr, show_flags);
	}
	if (error_code) {
		slurm_perror ("slurm_get_job_steps error");
		return SLURM_ERROR;
	}
	old_step_ptr = new_step_ptr;

	if (params.verbose) {
		printf ("last_update_time=%ld records=%u\n",
			(long) new_step_ptr->last_update,
			new_step_ptr->job_step_count);
	}

	if (!params.format && !params.format_long)
		params.format = "%.15i %.8j %.9P %.8u %.9M %N";

	if (!params.format_list) {
		if (params.format)
			parse_format(params.format);
		else if (params.format_long)
			parse_long_format(params.format_long);
	}

	print_steps_array( new_step_ptr->job_steps,
			   new_step_ptr->job_step_count,
			   params.format_list );
	return SLURM_SUCCESS;
}
Exemplo n.º 20
0
static s_p_hashtbl_t *_config_make_tbl(char *filename)
{
	s_p_hashtbl_t *tbl = NULL;

	xassert(filename);

	if (!(tbl = s_p_hashtbl_create(knl_conf_file_options))) {
		error("%s: s_p_hashtbl_create error: %s", prog_name,
		      slurm_strerror(slurm_get_errno()));
		return tbl;
	}

	if (s_p_parse_file(tbl, NULL, filename, false) == SLURM_ERROR) {
		error("%s: s_p_parse_file error: %s", prog_name,
		      slurm_strerror(slurm_get_errno()));
		s_p_hashtbl_destroy(tbl);
		tbl = NULL;
	}

	return tbl;
}
Exemplo n.º 21
0
/*
 * scontrol_update_powercap - update the slurm powercapping configuration per the
 *	supplied arguments
 * IN argc - count of arguments
 * IN argv - list of arguments
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *			error message and returns 0
 */
extern int
scontrol_update_powercap (int argc, char *argv[])
{
	update_powercap_msg_t powercap_msg;
	int i;
	char *tag, *val;
	int taglen, vallen;

	memset(&powercap_msg, 0, sizeof(update_powercap_msg_t));
	powercap_msg.powercap = (uint32_t) NO_VAL;
	powercap_msg.min_watts = (uint32_t) NO_VAL;
	powercap_msg.cur_max_watts = (uint32_t) NO_VAL;
	powercap_msg.adj_max_watts = (uint32_t) NO_VAL;
	powercap_msg.max_watts = (uint32_t) NO_VAL;

	for (i=0; i<argc; i++) {
		tag = argv[i];
		val = strchr(argv[i], '=');
		if (val) {
			taglen = val - argv[i];
			val++;
			vallen = strlen(val);
		} else {
			exit_code = 1;
			error("Invalid input: %s  Request aborted", argv[i]);
			return -1;
		}

		if (strncasecmp(tag, "PowerCap", MAX(taglen, 8)) == 0) {
			if (strncasecmp(val, "INFINITE",
					MAX(vallen, 8)) == 0 ) {
				powercap_msg.powercap = (uint32_t) INFINITE;
			} else if (parse_uint32(val,&(powercap_msg.powercap))) {
				error("Invalid PowerCap value: %s", val);
				return -1;
			}
			/* for now, we can break as we do not have other args */
			break;
		}
	}

	if (powercap_msg.powercap == (uint32_t) NO_VAL) {
		exit_code = 1;
		error("Invalid PowerCap value.");
		return 0;
	}

	if (slurm_update_powercap(&powercap_msg)) {
		exit_code = 1;
		return slurm_get_errno ();
	} else
		return 0;
}
Exemplo n.º 22
0
extern int get_new_info_block(block_info_msg_t **block_ptr, int force)
{
	int error_code = SLURM_NO_CHANGE_IN_DATA;
	block_info_msg_t *new_bg_ptr = NULL;
	time_t now = time(NULL);
	static time_t last;
	static bool changed = 0;
	uint16_t show_flags = 0;

	if (!(cluster_flags & CLUSTER_FLAG_BG))
		return error_code;

	if (g_block_info_ptr && !force
	    && ((now - last) < working_sview_config.refresh_delay)) {
		if (*block_ptr != g_block_info_ptr)
			error_code = SLURM_SUCCESS;
		*block_ptr = g_block_info_ptr;
		if (changed)
			error_code = SLURM_SUCCESS;
		goto end_it;
	}
	last = now;
	if (working_sview_config.show_hidden)
		show_flags |= SHOW_ALL;
	if (g_block_info_ptr) {
		error_code = slurm_load_block_info(
			g_block_info_ptr->last_update, &new_bg_ptr, show_flags);
		if (error_code == SLURM_SUCCESS) {
			slurm_free_block_info_msg(g_block_info_ptr);
			changed = 1;
		} else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_NO_CHANGE_IN_DATA;
			new_bg_ptr = g_block_info_ptr;
			changed = 0;
		}
	} else {
		new_bg_ptr = NULL;
		error_code = slurm_load_block_info(
			(time_t) NULL, &new_bg_ptr, show_flags);
		changed = 1;
	}

	g_block_info_ptr = new_bg_ptr;
	if (block_ptr) {
		if (g_block_info_ptr && (*block_ptr != g_block_info_ptr))
			error_code = SLURM_SUCCESS;

		*block_ptr = g_block_info_ptr;
	}
end_it:
	return error_code;
}
Exemplo n.º 23
0
extern int
scontrol_requeue_hold(int argc, char **argv)
{
	int rc = SLURM_SUCCESS;
	int i;
	uint32_t state_flag;
	uint32_t *ids;
	uint32_t num_ids;
	char *job_id_str;

	state_flag = 0;

	if (argc == 1)
		job_id_str = argv[0];
	else
		job_id_str = argv[1];

	ids = _get_job_ids(job_id_str, &num_ids);
	if (ids == NULL) {
		exit_code = 1;
		return 0;
	}

	if (argc == 2) {
		rc = _parse_requeue_flags(argv[0], &state_flag);
		if (rc < 0) {
			error("Invalid state specification %s", argv[0]);
			exit_code = 1;
			xfree(ids);
			return 0;
		}
	}
	state_flag |= JOB_REQUEUE_HOLD;

	/* Go and requeue the state either in
	 * JOB_SPECIAL_EXIT or HELD state.
	 */
	for (i = 0; i < num_ids; i++) {
		rc = slurm_requeue(ids[i], state_flag);
		if (rc != SLURM_SUCCESS) {
			fprintf(stderr, "%s  array job_id %u\n",
					slurm_strerror(slurm_get_errno()), ids[i]);
			exit_code = 1;
			break;
		}
	}

	xfree(ids);

	return rc;
}
Exemplo n.º 24
0
/*
 * scontrol_hold - perform some job hold/release operation
 * IN op - suspend/resume operation
 * IN job_id_str - a job id
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *		error message and returns 0
 */
extern int
scontrol_hold(char *op, char *job_id_str)
{
	int rc = SLURM_SUCCESS;
	char *next_str;
	job_desc_msg_t job_msg;
	uint16_t job_state;

	slurm_init_job_desc_msg (&job_msg);

	/* set current user, needed e.g., for AllowGroups checks */
	job_msg.user_id = getuid();

	if (job_id_str) {
		job_msg.job_id = (uint32_t) strtol(job_id_str, &next_str, 10);
		if ((job_msg.job_id == 0) || (next_str[0] != '\0')) {
			fprintf(stderr, "Invalid job id specified\n");
			exit_code = 1;
			return 0;
		}
	} else {
		fprintf(stderr, "Invalid job id specified\n");
		exit_code = 1;
		return 0;
	}

	job_state = scontrol_get_job_state(job_msg.job_id);
	if (job_state == (uint16_t) NO_VAL)
		return SLURM_ERROR;
	if ((job_state & JOB_STATE_BASE) != JOB_PENDING) {
		slurm_seterrno(ESLURM_JOB_NOT_PENDING);
		return ESLURM_JOB_NOT_PENDING;
	}

	if ((strncasecmp(op, "holdu", 5) == 0) ||
	    (strncasecmp(op, "uhold", 5) == 0)) {
		job_msg.priority = 0;
		job_msg.alloc_sid = ALLOC_SID_USER_HOLD;
	} else if (strncasecmp(op, "hold", 4) == 0) {
		job_msg.priority = 0;
		job_msg.alloc_sid = 0;
	} else
		job_msg.priority = INFINITE;

	if (slurm_update_job(&job_msg))
		return slurm_get_errno();

	return rc;
}
Exemplo n.º 25
0
extern void
scontrol_requeue_hold(uint32_t state_flag, char *job_str)
{
	int rc, i;
	char *job_id_str;
	job_array_resp_msg_t *resp = NULL;

	state_flag |= JOB_REQUEUE_HOLD;

	if (_is_job_id(job_str)) {
		job_id_str = _next_job_id();
		while (job_id_str) {
			rc = slurm_requeue2(job_id_str, state_flag, &resp);
			if (rc != SLURM_SUCCESS) {
				exit_code = 1;
				if (quiet_flag != 1) {
					fprintf(stderr, "%s for job %s\n",
						slurm_strerror(slurm_get_errno()),
						job_id_str);
				}
			} else if (resp) {
				for (i = 0; i < resp->job_array_count; i++) {
					if ((resp->error_code[i] == SLURM_SUCCESS)
					    && (resp->job_array_count == 1))
						continue;
					exit_code = 1;
					if (quiet_flag == 1)
						continue;
					fprintf(stderr, "%s: %s\n",
						resp->job_array_id[i],
						slurm_strerror(resp->
							       error_code[i]));
				}
				slurm_free_job_array_resp(resp);
				resp = NULL;
			}
			job_id_str = _next_job_id();
		}
	} else {
		exit_code = 1;
		rc = ESLURM_INVALID_JOB_ID;
		slurm_seterrno(rc);
		if (quiet_flag != 1) {
			fprintf(stderr, "%s for job %s\n",
				slurm_strerror(rc), job_str);
		}
	}
}
Exemplo n.º 26
0
/*
 * scontrol_update_res - update the slurm reservation configuration per the
 *     supplied arguments
 * IN argc - count of arguments
 * IN argv - list of arguments
 * RET 0 if no slurm error, errno otherwise. parsing error prints
 *     error message and returns 0.
 */
extern int
scontrol_update_res(int argc, char *argv[])
{
    resv_desc_msg_t   resv_msg;
    int err, ret = 0;
    int free_user_str = 0, free_acct_str = 0, free_tres_license = 0,
        free_tres_bb = 0, free_tres_corecnt = 0, free_tres_nodecnt = 0;

    slurm_init_resv_desc_msg (&resv_msg);
    err = scontrol_parse_res_options(argc, argv, "No reservation update.",
                                     &resv_msg, &free_user_str,
                                     &free_acct_str, &free_tres_license,
                                     &free_tres_bb, &free_tres_corecnt,
                                     &free_tres_nodecnt);
    if (err)
        goto SCONTROL_UPDATE_RES_CLEANUP;

    if (resv_msg.name == NULL) {
        exit_code = 1;
        error("Reservation must be given.  No reservation update.");
        goto SCONTROL_UPDATE_RES_CLEANUP;
    }

    err = slurm_update_reservation(&resv_msg);
    if (err) {
        exit_code = 1;
        slurm_perror("Error updating the reservation");
        ret = slurm_get_errno();
    } else {
        printf("Reservation updated.\n");
    }

SCONTROL_UPDATE_RES_CLEANUP:
    if (free_user_str)
        xfree(resv_msg.users);
    if (free_acct_str)
        xfree(resv_msg.accounts);
    if (free_tres_license)
        xfree(resv_msg.licenses);
    if (free_tres_bb)
        xfree(resv_msg.burst_buffer);
    if (free_tres_corecnt)
        xfree(resv_msg.core_cnt);
    if (free_tres_nodecnt)
        xfree(resv_msg.node_cnt);
    return ret;
}
Exemplo n.º 27
0
extern int get_new_info_resv(reserve_info_msg_t **info_ptr,
			     int force)
{
	static reserve_info_msg_t *new_resv_ptr = NULL;
	int error_code = SLURM_NO_CHANGE_IN_DATA;
	time_t now = time(NULL);
	static time_t last;
	static bool changed = 0;

	if (g_resv_info_ptr && !force
	    && ((now - last) < working_sview_config.refresh_delay)) {
		if (*info_ptr != g_resv_info_ptr)
			error_code = SLURM_SUCCESS;
		*info_ptr = g_resv_info_ptr;
		if (changed)
			error_code = SLURM_SUCCESS;
		goto end_it;
	}
	last = now;
	if (g_resv_info_ptr) {
		error_code = slurm_load_reservations(
			g_resv_info_ptr->last_update, &new_resv_ptr);
		if (error_code == SLURM_SUCCESS) {
			slurm_free_reservation_info_msg(g_resv_info_ptr);
			changed = 1;
		} else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) {
			error_code = SLURM_NO_CHANGE_IN_DATA;
			new_resv_ptr = g_resv_info_ptr;
			changed = 0;
		}
	} else {
		new_resv_ptr = NULL;
		error_code = slurm_load_reservations((time_t) NULL,
						     &new_resv_ptr);
		changed = 1;
	}

	g_resv_info_ptr = new_resv_ptr;

	if (g_resv_info_ptr && (*info_ptr != g_resv_info_ptr))
		error_code = SLURM_SUCCESS;

	*info_ptr = g_resv_info_ptr;
end_it:
	return error_code;
}
Exemplo n.º 28
0
/*
 * scontrol_top_job - Move the specified job ID to the top of the queue for
 *	a given user ID, partition, account, and QOS.
 * IN job_str - a job id
 */
extern void
scontrol_top_job(char *job_id_str)
{
	int rc;

	if (xstrncasecmp(job_id_str, "jobid=", 6) == 0)
		job_id_str += 6;
	if (xstrncasecmp(job_id_str, "job=", 4) == 0)
		job_id_str += 4;

	rc = slurm_top_job(job_id_str);
	if (rc != SLURM_SUCCESS) {
		exit_code = 1;
		if (quiet_flag != 1) {
			fprintf(stderr, "%s for job %s\n",
				slurm_strerror(slurm_get_errno()), job_id_str);
		}
	}
}
Exemplo n.º 29
0
static const char *
slurmdrmaa_get_DRM_system( fsd_drmaa_singletone_t *self )
{
	if(slurmdrmaa_version[0] == '\0') /*no locks as drmaa_get_drm_system is usually called only once */
	{
		slurm_ctl_conf_t * conf_info_msg_ptr = NULL; 
		if ( slurm_load_ctl_conf ((time_t) NULL, &conf_info_msg_ptr ) == -1 ) 
		{ 
			fsd_log_error(("slurm_load_ctl_conf error: %s",slurm_strerror(slurm_get_errno())));
			fsd_snprintf(NULL, slurmdrmaa_version, sizeof(slurmdrmaa_version)-1,"SLURM");
		}
		else
		{
			fsd_snprintf(NULL, slurmdrmaa_version, sizeof(slurmdrmaa_version)-1,"SLURM %s", conf_info_msg_ptr->version);
			slurm_free_ctl_conf (conf_info_msg_ptr);
		}
	}
	return slurmdrmaa_version;
}
Exemplo n.º 30
0
static void _wait_for_allocation_response(uint32_t job_id,
					  const listen_t *listen,
					  uint16_t msg_type, int timeout,
					  void **resp)
{
	int errnum, rc;

	info("job %u queued and waiting for resources", job_id);
	*resp = NULL;
	if ((rc = _wait_for_alloc_rpc(listen, timeout)) == 1)
		rc = _accept_msg_connection(listen->fd, msg_type, resp);
	if (rc <= 0) {
		errnum = errno;
		/* Maybe the resource allocation response RPC got lost
		 * in the mail; surely it should have arrived by now.
		 * Let's see if the controller thinks that the allocation
		 * has been granted.
		 */
		if (msg_type == RESPONSE_RESOURCE_ALLOCATION) {
			if (slurm_allocation_lookup(job_id,
					(resource_allocation_response_msg_t **)
					resp) >= 0)
				return;
		} else if (msg_type == RESPONSE_JOB_PACK_ALLOCATION) {
			if (slurm_pack_job_lookup(job_id, (List *) resp) >= 0)
				return;
		} else {
			error("%s: Invalid msg_type (%u)", __func__, msg_type);
		}

		if (slurm_get_errno() == ESLURM_JOB_PENDING) {
			debug3("Still waiting for allocation");
			errno = errnum;
			return;
		} else {
			debug3("Unable to confirm allocation for job %u: %m",
			       job_id);
			return;
		}
	}
	info("job %u has been allocated resources", job_id);
	return;
}