Beispiel #1
0
static void _signal_while_allocating(int signo)
{
	allocation_interrupted = true;
	if (pending_job_id != 0) {
		slurm_complete_job(pending_job_id, 0);
	}
}
Beispiel #2
0
static void _signal_while_allocating(int signo)
{
	destroy_job = 1;
	if (pending_job_id != 0) {
		slurm_complete_job(pending_job_id, NO_VAL);
		info("Job allocation %u has been revoked.", pending_job_id);

	}
}
Beispiel #3
0
static void _signal_while_allocating(int signo)
{
	debug("Got signal %d", signo);
	if (signo == SIGCONT)
		return;

	destroy_job = 1;
	if (pending_job_id != 0) {
		slurm_complete_job(pending_job_id, NO_VAL);
		info("Job allocation %u has been revoked.", pending_job_id);

	}
}
Beispiel #4
0
static void *_safe_signal_while_allocating(void *in_data)
{
	int signo = *(int *)in_data;

	debug("Got signal %d", signo);
	if (signo == SIGCONT)
		return NULL;

	destroy_job = 1;
	if (pending_job_id != 0) {
		slurm_complete_job(pending_job_id, NO_VAL);
		info("Job allocation %u has been revoked.", pending_job_id);
	}

	return NULL;
}
Beispiel #5
0
/*
 * slurm_allocate_resources_blocking
 *	allocate resources for a job request.  This call will block until
 *	the allocation is granted, or the specified timeout limit is reached.
 * IN req - description of resource allocation request
 * IN timeout - amount of time, in seconds, to wait for a response before
 * 	giving up.
 *	A timeout of zero will wait indefinitely.
 * IN pending_callback - If the allocation cannot be granted immediately,
 *      the controller will put the job in the PENDING state.  If
 *      pending callback is not NULL, it will be called with the job_id
 *      of the pending job as the sole parameter.
 *
 * RET allocation structure on success, NULL on error set errno to
 *	indicate the error (errno will be ETIMEDOUT if the timeout is reached
 *      with no allocation granted)
 * NOTE: free the response using slurm_free_resource_allocation_response_msg()
 */
resource_allocation_response_msg_t *
slurm_allocate_resources_blocking (const job_desc_msg_t *user_req,
				   time_t timeout,
				   void(*pending_callback)(uint32_t job_id))
{
	int rc;
	slurm_msg_t req_msg;
	slurm_msg_t resp_msg;
	resource_allocation_response_msg_t *resp = NULL;
	char *hostname = NULL;
	uint32_t job_id;
	job_desc_msg_t *req;
	listen_t *listen = NULL;
	int errnum = SLURM_SUCCESS;

	slurm_msg_t_init(&req_msg);
	slurm_msg_t_init(&resp_msg);

	/* make a copy of the user's job description struct so that we
	 * can make changes before contacting the controller */
	req = (job_desc_msg_t *)xmalloc(sizeof(job_desc_msg_t));
	if (req == NULL)
		return NULL;
	memcpy(req, user_req, sizeof(job_desc_msg_t));

	/*
	 * set Node and session id for this request
	 */
	if (req->alloc_sid == NO_VAL)
		req->alloc_sid = getsid(0);

	if (user_req->alloc_node != NULL) {
		req->alloc_node = xstrdup(user_req->alloc_node);
	} else if ((hostname = xshort_hostname()) != NULL) {
		req->alloc_node = hostname;
	} else {
		error("Could not get local hostname,"
		      " forcing immediate allocation mode.");
		req->immediate = 1;
	}

	if (!req->immediate) {
		listen = _create_allocation_response_socket(hostname);
		if (listen == NULL) {
			xfree(req);
			return NULL;
		}
		req->alloc_resp_port = listen->port;
	}

	req_msg.msg_type = REQUEST_RESOURCE_ALLOCATION;
	req_msg.data     = req;

	rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg);

	if (rc == SLURM_SOCKET_ERROR) {
		int errnum = errno;
		destroy_forward(&req_msg.forward);
		destroy_forward(&resp_msg.forward);
		if (!req->immediate)
			_destroy_allocation_response_socket(listen);
		xfree(req);
		errno = errnum;
		return NULL;
	}

	switch (resp_msg.msg_type) {
	case RESPONSE_SLURM_RC:
		if (_handle_rc_msg(&resp_msg) < 0) {
			/* will reach this when the allocation fails */
			errnum = errno;
		} else {
			/* shouldn't get here */
			errnum = -1;
		}
		break;
	case RESPONSE_RESOURCE_ALLOCATION:
		/* Yay, the controller has acknowledged our request!  But did
		   we really get an allocation yet? */
		resp = (resource_allocation_response_msg_t *) resp_msg.data;
		if (resp->node_cnt > 0) {
			/* yes, allocation has been granted */
			errno = SLURM_PROTOCOL_SUCCESS;
		} else if (!req->immediate) {
			if (resp->error_code != SLURM_SUCCESS)
				info("%s", slurm_strerror(resp->error_code));
			/* no, we need to wait for a response */
			job_id = resp->job_id;
			slurm_free_resource_allocation_response_msg(resp);
			if (pending_callback != NULL)
				pending_callback(job_id);
 			resp = _wait_for_allocation_response(job_id, listen,
							     timeout);
			/* If NULL, we didn't get the allocation in
			   the time desired, so just free the job id */
			if ((resp == NULL) && (errno != ESLURM_ALREADY_DONE)) {
				errnum = errno;
				slurm_complete_job(job_id, -1);
			}
		}
		break;
	default:
		errnum = SLURM_UNEXPECTED_MSG_ERROR;
		resp = NULL;
	}

	destroy_forward(&req_msg.forward);
	destroy_forward(&resp_msg.forward);
	if (!req->immediate)
		_destroy_allocation_response_socket(listen);
	xfree(req);
	errno = errnum;
	return resp;
}
Beispiel #6
0
resource_allocation_response_msg_t *
allocate_nodes(void)
{
	resource_allocation_response_msg_t *resp = NULL;
	job_desc_msg_t *j = job_desc_msg_create_from_opts();
	slurm_allocation_callbacks_t callbacks;
	int i;

	if (!j)
		return NULL;

	/* Do not re-use existing job id when submitting new job
	 * from within a running job */
	if ((j->job_id != NO_VAL) && !opt.jobid_set) {
		info("WARNING: Creating SLURM job allocation from within "
		     "another allocation");
		info("WARNING: You are attempting to initiate a second job");
		if (!opt.jobid_set)	/* Let slurmctld set jobid */
			j->job_id = NO_VAL;
	}
	callbacks.ping = _ping_handler;
	callbacks.timeout = _timeout_handler;
	callbacks.job_complete = _job_complete_handler;
	callbacks.user_msg = _user_msg_handler;
	callbacks.node_fail = _node_fail_handler;

	/* create message thread to handle pings and such from slurmctld */
	msg_thr = slurm_allocation_msg_thr_create(&j->other_port, &callbacks);

	/* NOTE: Do not process signals in separate pthread. The signal will
	 * cause slurm_allocate_resources_blocking() to exit immediately. */
	xsignal_unblock(sig_array);
	for (i = 0; sig_array[i]; i++)
		xsignal(sig_array[i], _signal_while_allocating);

	while (!resp) {
		resp = slurm_allocate_resources_blocking(j, opt.immediate,
							 _set_pending_job_id);
		if (destroy_job) {
			/* cancelled by signal */
			break;
		} else if (!resp && !_retry()) {
			break;
		}
	}

	if (resp && !destroy_job) {
		/*
		 * Allocation granted!
		 */
		pending_job_id = resp->job_id;
#ifdef HAVE_BG
		if (!_wait_bluegene_block_ready(resp)) {
			if(!destroy_job)
				error("Something is wrong with the "
				      "boot of the block.");
			goto relinquish;
		}
#else
		if (!_wait_nodes_ready(resp)) {
			if(!destroy_job)
				error("Something is wrong with the "
				      "boot of the nodes.");
			goto relinquish;
		}
#endif
	} else if (destroy_job) {
		goto relinquish;
	}

	xsignal_block(sig_array);

	job_desc_msg_destroy(j);

	return resp;

relinquish:

	slurm_free_resource_allocation_response_msg(resp);
	if (!destroy_job)
		slurm_complete_job(resp->job_id, 1);
	exit(error_exit);
	return NULL;
}
Beispiel #7
0
resource_allocation_response_msg_t *
allocate_nodes(bool handle_signals)
{
	resource_allocation_response_msg_t *resp = NULL;
	job_desc_msg_t *j = job_desc_msg_create_from_opts();
	slurm_allocation_callbacks_t callbacks;
	int i;

	if (!j)
		return NULL;

	/* Do not re-use existing job id when submitting new job
	 * from within a running job */
	if ((j->job_id != NO_VAL) && !opt.jobid_set) {
		info("WARNING: Creating SLURM job allocation from within "
		     "another allocation");
		info("WARNING: You are attempting to initiate a second job");
		if (!opt.jobid_set)	/* Let slurmctld set jobid */
			j->job_id = NO_VAL;
	}
	callbacks.ping = _ping_handler;
	callbacks.timeout = _timeout_handler;
	callbacks.job_complete = _job_complete_handler;
	callbacks.job_suspend = NULL;
	callbacks.user_msg = _user_msg_handler;
	callbacks.node_fail = _node_fail_handler;

	/* create message thread to handle pings and such from slurmctld */
	msg_thr = slurm_allocation_msg_thr_create(&j->other_port, &callbacks);

	/* NOTE: Do not process signals in separate pthread. The signal will
	 * cause slurm_allocate_resources_blocking() to exit immediately. */
	if (handle_signals) {
		xsignal_unblock(sig_array);
		for (i = 0; sig_array[i]; i++)
			xsignal(sig_array[i], _signal_while_allocating);
	}

	while (!resp) {
		resp = slurm_allocate_resources_blocking(j, opt.immediate,
							 _set_pending_job_id);
		if (destroy_job) {
			/* cancelled by signal */
			break;
		} else if (!resp && !_retry()) {
			break;
		}
	}

	if (resp && !destroy_job) {
		/*
		 * Allocation granted!
		 */
		pending_job_id = resp->job_id;

		/*
		 * These values could be changed while the job was
		 * pending so overwrite the request with what was
		 * allocated so we don't have issues when we use them
		 * in the step creation.
		 */
		if (opt.pn_min_memory != NO_VAL)
			opt.pn_min_memory = (resp->pn_min_memory &
					     (~MEM_PER_CPU));
		else if (opt.mem_per_cpu != NO_VAL)
			opt.mem_per_cpu = (resp->pn_min_memory &
					   (~MEM_PER_CPU));
		/*
		 * FIXME: timelimit should probably also be updated
		 * here since it could also change.
		 */

#ifdef HAVE_BG
		uint32_t node_cnt = 0;
		select_g_select_jobinfo_get(resp->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &node_cnt);
		if ((node_cnt == 0) || (node_cnt == NO_VAL)) {
			opt.min_nodes = node_cnt;
			opt.max_nodes = node_cnt;
		} /* else we just use the original request */

		if (!_wait_bluegene_block_ready(resp)) {
			if (!destroy_job)
				error("Something is wrong with the "
				      "boot of the block.");
			goto relinquish;
		}
#else
		opt.min_nodes = resp->node_cnt;
		opt.max_nodes = resp->node_cnt;

		if (!_wait_nodes_ready(resp)) {
			if (!destroy_job)
				error("Something is wrong with the "
				      "boot of the nodes.");
			goto relinquish;
		}
#endif
	} else if (destroy_job) {
		goto relinquish;
	}

	if (handle_signals)
		xsignal_block(sig_array);

	job_desc_msg_destroy(j);

	return resp;

relinquish:
	if (resp) {
		if (!destroy_job)
			slurm_complete_job(resp->job_id, 1);
		slurm_free_resource_allocation_response_msg(resp);
	}
	exit(error_exit);
	return NULL;
}
Beispiel #8
0
/*
 * slurm_allocate_pack_job_blocking
 *	allocate resources for a list of job requests.  This call will block
 *	until the entire allocation is granted, or the specified timeout limit
 *	is reached.
 * IN req - List of resource allocation requests
 * IN timeout - amount of time, in seconds, to wait for a response before
 * 	giving up.
 *	A timeout of zero will wait indefinitely.
 * IN pending_callback - If the allocation cannot be granted immediately,
 *      the controller will put the job in the PENDING state.  If
 *      pending callback is not NULL, it will be called with the job_id
 *      of the pending job as the sole parameter.
 *
 * RET List of allocation structures on success, NULL on error set errno to
 *	indicate the error (errno will be ETIMEDOUT if the timeout is reached
 *      with no allocation granted)
 * NOTE: free the response using list_destroy()
 */
List slurm_allocate_pack_job_blocking(List job_req_list, time_t timeout,
				      void(*pending_callback)(uint32_t job_id))
{
	int rc;
	slurm_msg_t req_msg;
	slurm_msg_t resp_msg;
	List resp = NULL;
	char *local_hostname = NULL;
	job_desc_msg_t *req;
	listen_t *listen = NULL;
	int errnum = SLURM_SUCCESS;
	ListIterator iter;
	bool immediate_flag = false;
	bool immediate_logged = false;
	uint32_t node_cnt = 0, job_id = 0;

	slurm_msg_t_init(&req_msg);
	slurm_msg_t_init(&resp_msg);

	/*
	 * set node name and session ID for this request
	 */

	if (!immediate_flag) {
		listen = _create_allocation_response_socket(local_hostname);
		if (listen == NULL)
			return NULL;
	}

	local_hostname = xshort_hostname();
	iter = list_iterator_create(job_req_list);
	while ((req = (job_desc_msg_t *) list_next(iter))) {
		if (req->alloc_sid == NO_VAL)
			req->alloc_sid = getsid(0);
		if (listen)
			req->alloc_resp_port = listen->port;

		if (!req->alloc_node) {
			if (local_hostname) {
				req->alloc_node = local_hostname;
			} else if (immediate_logged) {
				req->immediate = 1;
			} else {
				req->immediate = 1;
				error("Could not get local hostname, forcing "
				      "immediate allocation mode");
				immediate_logged = true;
			}
		}
		if (req->immediate)
			immediate_flag = true;
	}
	list_iterator_destroy(iter);

	req_msg.msg_type = REQUEST_JOB_PACK_ALLOCATION;
	req_msg.data     = job_req_list;

	rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg,
					    working_cluster_rec);

	if (rc == SLURM_SOCKET_ERROR) {
		int errnum = errno;
		destroy_forward(&req_msg.forward);
		destroy_forward(&resp_msg.forward);
		if (listen)
			_destroy_allocation_response_socket(listen);
		iter = list_iterator_create(job_req_list);
		while ((req = (job_desc_msg_t *)list_next(iter))) {
			if (req->alloc_node == local_hostname)
				req->alloc_node = NULL;
		}
		list_iterator_destroy(iter);
		xfree(local_hostname);
		errno = errnum;
		return NULL;
	}

	switch (resp_msg.msg_type) {
	case RESPONSE_SLURM_RC:
		if (_handle_rc_msg(&resp_msg) < 0) {
			/* will reach this when the allocation fails */
			errnum = errno;
		} else {
			/* shouldn't get here */
			errnum = SLURM_ERROR;
		}
		break;
	case RESPONSE_JOB_PACK_ALLOCATION:
		/* Yay, the controller has acknowledged our request!
		 * Test if we have an allocation yet? */
		resp = (List) resp_msg.data;
		_pack_alloc_test(resp, &node_cnt, &job_id);
		if (node_cnt > 0) {
			/* yes, allocation has been granted */
			errno = SLURM_PROTOCOL_SUCCESS;
		} else if (immediate_flag) {
			debug("Immediate allocation not granted");
		} else {
			/* no, we need to wait for a response */
			FREE_NULL_LIST(resp);
			if (pending_callback != NULL)
				pending_callback(job_id);
			_wait_for_allocation_response(job_id, listen,
						RESPONSE_JOB_PACK_ALLOCATION,
						timeout, (void **) &resp);
			/* If NULL, we didn't get the allocation in
			 * the time desired, so just free the job id */
			if ((resp == NULL) && (errno != ESLURM_ALREADY_DONE)) {
				errnum = errno;
				slurm_complete_job(job_id, -1);
			}
		}
		break;
	default:
		errnum = SLURM_UNEXPECTED_MSG_ERROR;
	}

	destroy_forward(&req_msg.forward);
	destroy_forward(&resp_msg.forward);
	if (listen)
		_destroy_allocation_response_socket(listen);
	iter = list_iterator_create(job_req_list);
	while ((req = (job_desc_msg_t *)list_next(iter))) {
		if (req->alloc_node == local_hostname)
			req->alloc_node = NULL;
	}
	list_iterator_destroy(iter);
	xfree(local_hostname);
	errno = errnum;

	return resp;
}
Beispiel #9
0
/*
 * slurm_allocate_resources_blocking
 *	allocate resources for a job request.  This call will block until
 *	the allocation is granted, or the specified timeout limit is reached.
 * IN req - description of resource allocation request
 * IN timeout - amount of time, in seconds, to wait for a response before
 * 	giving up.
 *	A timeout of zero will wait indefinitely.
 * IN pending_callback - If the allocation cannot be granted immediately,
 *      the controller will put the job in the PENDING state.  If
 *      pending callback is not NULL, it will be called with the job_id
 *      of the pending job as the sole parameter.
 *
 * RET allocation structure on success, NULL on error set errno to
 *	indicate the error (errno will be ETIMEDOUT if the timeout is reached
 *      with no allocation granted)
 * NOTE: free the response using slurm_free_resource_allocation_response_msg()
 */
resource_allocation_response_msg_t *
slurm_allocate_resources_blocking (const job_desc_msg_t *user_req,
				   time_t timeout,
				   void(*pending_callback)(uint32_t job_id))
{
	int rc;
	slurm_msg_t req_msg;
	slurm_msg_t resp_msg;
	resource_allocation_response_msg_t *resp = NULL;
	uint32_t job_id;
	job_desc_msg_t *req;
	listen_t *listen = NULL;
	int errnum = SLURM_SUCCESS;
	bool already_done = false;

	slurm_msg_t_init(&req_msg);
	slurm_msg_t_init(&resp_msg);

	/* make a copy of the user's job description struct so that we
	 * can make changes before contacting the controller */
	req = (job_desc_msg_t *)xmalloc(sizeof(job_desc_msg_t));
	if (req == NULL)
		return NULL;
	memcpy(req, user_req, sizeof(job_desc_msg_t));

	/*
	 * set Node and session id for this request
	 */
	if (req->alloc_sid == NO_VAL)
		req->alloc_sid = getsid(0);

	if (!req->immediate) {
		listen = _create_allocation_response_socket();
		if (listen == NULL) {
			xfree(req);
			return NULL;
		}
		req->alloc_resp_port = listen->port;
	}

	req_msg.msg_type = REQUEST_RESOURCE_ALLOCATION;
	req_msg.data     = req;

	rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg,
					    working_cluster_rec);

	if (rc == SLURM_ERROR) {
		int errnum = errno;
		destroy_forward(&req_msg.forward);
		destroy_forward(&resp_msg.forward);
		if (!req->immediate)
			_destroy_allocation_response_socket(listen);
		xfree(req);
		errno = errnum;
		return NULL;
	}

	switch (resp_msg.msg_type) {
	case RESPONSE_SLURM_RC:
		if (_handle_rc_msg(&resp_msg) < 0) {
			/* will reach this when the allocation fails */
			errnum = errno;
		} else {
			/* shouldn't get here */
			errnum = SLURM_ERROR;
		}
		break;
	case RESPONSE_RESOURCE_ALLOCATION:
		/* Yay, the controller has acknowledged our request!
		 * Test if we have an allocation yet? */
		resp = (resource_allocation_response_msg_t *) resp_msg.data;
		if (resp->node_cnt > 0) {
			/* yes, allocation has been granted */
			errno = SLURM_SUCCESS;
		} else if (!req->immediate) {
			if (resp->error_code != SLURM_SUCCESS)
				info("%s", slurm_strerror(resp->error_code));
			/* no, we need to wait for a response */

			/* print out any user messages before we wait. */
			print_multi_line_string(resp->job_submit_user_msg,
						-1, LOG_LEVEL_INFO);

			job_id = resp->job_id;
			slurm_free_resource_allocation_response_msg(resp);
			if (pending_callback != NULL)
				pending_callback(job_id);
			_wait_for_allocation_response(job_id, listen,
						RESPONSE_RESOURCE_ALLOCATION,
						timeout, (void **) &resp);
			/* If NULL, we didn't get the allocation in
			   the time desired, so just free the job id */
			if ((resp == NULL) && (errno != ESLURM_ALREADY_DONE)) {
				errnum = errno;
				slurm_complete_job(job_id, -1);
			}
			if ((resp == NULL) && (errno == ESLURM_ALREADY_DONE))
				already_done = true;
		}
		break;
	default:
		errnum = SLURM_UNEXPECTED_MSG_ERROR;
		resp = NULL;
	}

	destroy_forward(&req_msg.forward);
	destroy_forward(&resp_msg.forward);
	if (!req->immediate)
		_destroy_allocation_response_socket(listen);
	xfree(req);
	if (!resp && already_done && (errnum == SLURM_SUCCESS))
		errnum = ESLURM_ALREADY_DONE;
	errno = errnum;
	return resp;
}
Beispiel #10
0
int main(int argc, char *argv[])
{
	log_options_t logopt = LOG_OPTS_STDERR_ONLY;
	job_desc_msg_t desc;
	resource_allocation_response_msg_t *alloc;
	time_t before, after;
	allocation_msg_thread_t *msg_thr;
	char **env = NULL;
	int status = 0;
	int retries = 0;
	pid_t pid  = getpid();
	pid_t tpgid = 0;
	pid_t rc_pid = 0;
	int i, rc = 0;
	static char *msg = "Slurm job queue full, sleeping and retrying.";
	slurm_allocation_callbacks_t callbacks;

	log_init(xbasename(argv[0]), logopt, 0, NULL);
	_set_exit_code();

	if (spank_init_allocator() < 0) {
		error("Failed to initialize plugin stack");
		exit(error_exit);
	}

	/* Be sure to call spank_fini when salloc exits
	 */
	if (atexit((void (*) (void)) spank_fini) < 0)
		error("Failed to register atexit handler for plugins: %m");


	if (initialize_and_process_args(argc, argv) < 0) {
		error("salloc parameter parsing");
		exit(error_exit);
	}
	/* reinit log with new verbosity (if changed by command line) */
	if (opt.verbose || opt.quiet) {
		logopt.stderr_level += opt.verbose;
		logopt.stderr_level -= opt.quiet;
		logopt.prefix_level = 1;
		log_alter(logopt, 0, NULL);
	}

	if (spank_init_post_opt() < 0) {
		error("Plugin stack post-option processing failed");
		exit(error_exit);
	}

	_set_spank_env();
	_set_submit_dir_env();
	if (opt.cwd && chdir(opt.cwd)) {
		error("chdir(%s): %m", opt.cwd);
		exit(error_exit);
	}

	if (opt.get_user_env_time >= 0) {
		char *user = uid_to_string(opt.uid);
		if (strcmp(user, "nobody") == 0) {
			error("Invalid user id %u: %m", (uint32_t)opt.uid);
			exit(error_exit);
		}
		env = env_array_user_default(user,
					     opt.get_user_env_time,
					     opt.get_user_env_mode);
		xfree(user);
		if (env == NULL)
			exit(error_exit);    /* error already logged */
		_set_rlimits(env);
	}

	/*
	 * Job control for interactive salloc sessions: only if ...
	 *
	 * a) input is from a terminal (stdin has valid termios attributes),
	 * b) controlling terminal exists (non-negative tpgid),
	 * c) salloc is not run in allocation-only (--no-shell) mode,
	 * d) salloc runs in its own process group (true in interactive
	 *    shells that support job control),
	 * e) salloc has been configured at compile-time to support background
	 *    execution and is not currently in the background process group.
	 */
	if (tcgetattr(STDIN_FILENO, &saved_tty_attributes) < 0) {
		/*
		 * Test existence of controlling terminal (tpgid > 0)
		 * after first making sure stdin is not redirected.
		 */
	} else if ((tpgid = tcgetpgrp(STDIN_FILENO)) < 0) {
		if (!opt.no_shell) {
			error("no controlling terminal: please set --no-shell");
			exit(error_exit);
		}
	} else if ((!opt.no_shell) && (pid == getpgrp())) {
		if (tpgid == pid)
			is_interactive = true;
#ifdef SALLOC_RUN_FOREGROUND
		while (tcgetpgrp(STDIN_FILENO) != pid) {
			if (!is_interactive) {
				error("Waiting for program to be placed in "
				      "the foreground");
				is_interactive = true;
			}
			killpg(pid, SIGTTIN);
		}
#endif
	}
	/*
	 * Reset saved tty attributes at exit, in case a child
	 * process died before properly resetting terminal.
	 */
	if (is_interactive)
		atexit (_reset_input_mode);

	/*
	 * Request a job allocation
	 */
	slurm_init_job_desc_msg(&desc);
	if (_fill_job_desc_from_opts(&desc) == -1) {
		exit(error_exit);
	}
	if (opt.gid != (gid_t) -1) {
		if (setgid(opt.gid) < 0) {
			error("setgid: %m");
			exit(error_exit);
		}
	}

	callbacks.ping = _ping_handler;
	callbacks.timeout = _timeout_handler;
	callbacks.job_complete = _job_complete_handler;
	callbacks.user_msg = _user_msg_handler;
	callbacks.node_fail = _node_fail_handler;
	/* create message thread to handle pings and such from slurmctld */
	msg_thr = slurm_allocation_msg_thr_create(&desc.other_port,
						  &callbacks);

	/* NOTE: Do not process signals in separate pthread. The signal will
	 * cause slurm_allocate_resources_blocking() to exit immediately. */
	for (i = 0; sig_array[i]; i++)
		xsignal(sig_array[i], _signal_while_allocating);

	before = time(NULL);
	while ((alloc = slurm_allocate_resources_blocking(&desc, opt.immediate,
					_pending_callback)) == NULL) {
		if ((errno != ESLURM_ERROR_ON_DESC_TO_RECORD_COPY) ||
		    (retries >= MAX_RETRIES))
			break;
		if (retries == 0)
			error("%s", msg);
		else
			debug("%s", msg);
		sleep (++retries);
	}

	/* become the user after the allocation has been requested. */
	if (opt.uid != (uid_t) -1) {
		if (setuid(opt.uid) < 0) {
			error("setuid: %m");
			exit(error_exit);
		}
	}
	if (alloc == NULL) {
		if (allocation_interrupted) {
			/* cancelled by signal */
			info("Job aborted due to signal");
		} else if (errno == EINTR) {
			error("Interrupted by signal."
			      "  Allocation request rescinded.");
		} else if (opt.immediate &&
			   ((errno == ETIMEDOUT) ||
			    (errno == ESLURM_NOT_TOP_PRIORITY) ||
			    (errno == ESLURM_NODES_BUSY))) {
			error("Unable to allocate resources: %m");
			error_exit = immediate_exit;
		} else {
			error("Failed to allocate resources: %m");
		}
		slurm_allocation_msg_thr_destroy(msg_thr);
		exit(error_exit);
	} else if (!allocation_interrupted) {
		/*
		 * Allocation granted!
		 */
		info("Granted job allocation %u", alloc->job_id);
		pending_job_id = alloc->job_id;
#ifdef HAVE_BG
		if (!_wait_bluegene_block_ready(alloc)) {
			if(!allocation_interrupted)
				error("Something is wrong with the "
				      "boot of the block.");
			goto relinquish;
		}
#else
		if (!_wait_nodes_ready(alloc)) {
			if(!allocation_interrupted)
				error("Something is wrong with the "
				      "boot of the nodes.");
			goto relinquish;
		}
#endif
	}

	after = time(NULL);
	if (opt.bell == BELL_ALWAYS
	    || (opt.bell == BELL_AFTER_DELAY
		&& ((after - before) > DEFAULT_BELL_DELAY))) {
		_ring_terminal_bell();
	}
	if (opt.no_shell)
		exit(0);
	if (allocation_interrupted) {
		/* salloc process received a signal after
		 * slurm_allocate_resources_blocking returned with the
		 * allocation, but before the new signal handlers were
		 * registered.
		 */
		goto relinquish;
	}

	/*
	 * Run the user's command.
	 */
	if (env_array_for_job(&env, alloc, &desc) != SLURM_SUCCESS)
		goto relinquish;

	/* Add default task count for srun, if not already set */
	if (opt.ntasks_set) {
		env_array_append_fmt(&env, "SLURM_NTASKS", "%d", opt.ntasks);
		/* keep around for old scripts */
		env_array_append_fmt(&env, "SLURM_NPROCS", "%d", opt.ntasks);
	}
	if (opt.cpus_per_task > 1) {
		env_array_append_fmt(&env, "SLURM_CPUS_PER_TASK", "%d",
				     opt.cpus_per_task);
	}
	if (opt.overcommit) {
		env_array_append_fmt(&env, "SLURM_OVERCOMMIT", "%d",
			opt.overcommit);
	}
	if (opt.acctg_freq >= 0) {
		env_array_append_fmt(&env, "SLURM_ACCTG_FREQ", "%d",
			opt.acctg_freq);
	}
	if (opt.network)
		env_array_append_fmt(&env, "SLURM_NETWORK", "%s", opt.network);

	env_array_set_environment(env);
	env_array_free(env);
	pthread_mutex_lock(&allocation_state_lock);
	if (allocation_state == REVOKED) {
		error("Allocation was revoked for job %u before command could "
		      "be run", alloc->job_id);
		pthread_mutex_unlock(&allocation_state_lock);
		if (slurm_complete_job(alloc->job_id, status) != 0) {
			error("Unable to clean up allocation for job %u: %m",
			      alloc->job_id);
		}
		return 1;
 	}
	allocation_state = GRANTED;
	pthread_mutex_unlock(&allocation_state_lock);

	/*  Ensure that salloc has initial terminal foreground control.  */
	if (is_interactive) {
		/*
		 * Ignore remaining job-control signals (other than those in
		 * sig_array, which at this state act like SIG_IGN).
		 */
		xsignal(SIGTSTP, SIG_IGN);
		xsignal(SIGTTIN, SIG_IGN);
		xsignal(SIGTTOU, SIG_IGN);

		pid = getpid();
		setpgid(pid, pid);

		tcsetpgrp(STDIN_FILENO, pid);
	}

	command_pid = _fork_command(command_argv);
	/*
	 * Wait for command to exit, OR for waitpid to be interrupted by a
	 * signal.  Either way, we are going to release the allocation next.
	 */
	if (command_pid > 0) {
		setpgid(command_pid, command_pid);
		if (is_interactive)
			tcsetpgrp(STDIN_FILENO, command_pid);

		/* NOTE: Do not process signals in separate pthread.
		 * The signal will cause waitpid() to exit immediately. */
		xsignal(SIGHUP,  _exit_on_signal);

		/* Use WUNTRACED to treat stopped children like terminated ones */
		do {
			rc_pid = waitpid(command_pid, &status, WUNTRACED);
		} while ((rc_pid == -1) && (!exit_flag));

		if ((rc_pid == -1) && (errno != EINTR))
			error("waitpid for %s failed: %m", command_argv[0]);
	}

	if (is_interactive)
		tcsetpgrp(STDIN_FILENO, pid);

	/*
	 * Relinquish the job allocation (if not already revoked).
	 */
relinquish:
	pthread_mutex_lock(&allocation_state_lock);
	if (allocation_state != REVOKED) {
		pthread_mutex_unlock(&allocation_state_lock);

		info("Relinquishing job allocation %d", alloc->job_id);
		if ((slurm_complete_job(alloc->job_id, status) != 0) &&
		    (slurm_get_errno() != ESLURM_ALREADY_DONE))
			error("Unable to clean up job allocation %d: %m",
			      alloc->job_id);
		pthread_mutex_lock(&allocation_state_lock);
		allocation_state = REVOKED;
	}
	pthread_mutex_unlock(&allocation_state_lock);

	slurm_free_resource_allocation_response_msg(alloc);
	slurm_allocation_msg_thr_destroy(msg_thr);

	/*
	 * Figure out what return code we should use.  If the user's command
	 * exited normally, return the user's return code.
	 */
	rc = 1;
	if (rc_pid != -1) {

		if (WIFEXITED(status)) {
			rc = WEXITSTATUS(status);
		} else if (WIFSIGNALED(status)) {
			verbose("Command \"%s\" was terminated by signal %d",
				command_argv[0], WTERMSIG(status));
			/* if we get these signals we return a normal
			 * exit since this was most likely sent from the
			 * user */
			switch(WTERMSIG(status)) {
			case SIGHUP:
			case SIGINT:
			case SIGQUIT:
			case SIGKILL:
				rc = 0;
				break;
			default:
				break;
			}
		}
	}
	return rc;
}
Beispiel #11
0
/*
 * Allocate nodes for heterogeneous/pack job from the slurm controller -- 
 * retrying the attempt if the controller appears to be down, and optionally
 * waiting for resources if none are currently available (see opt.immediate)
 *
 * Returns a pointer to a resource_allocation_response_msg which must
 * be freed with slurm_free_resource_allocation_response_msg()
 */
List allocate_pack_nodes(bool handle_signals)
{
	resource_allocation_response_msg_t *resp = NULL;
	bool jobid_log = true;
	job_desc_msg_t *j, *first_job = NULL;
	slurm_allocation_callbacks_t callbacks;
	ListIterator opt_iter, resp_iter;
	slurm_opt_t *opt_local, *first_opt = NULL;
	List job_req_list = NULL, job_resp_list = NULL;
	uint32_t my_job_id = 0;
	int i, k;

	job_req_list = list_create(NULL);
	opt_iter = list_iterator_create(opt_list);
	while ((opt_local = list_next(opt_iter))) {
		srun_opt_t *srun_opt = opt_local->srun_opt;
		xassert(srun_opt);
		if (!first_opt)
			first_opt = opt_local;
		if (srun_opt->relative_set && srun_opt->relative)
			fatal("--relative option invalid for job allocation request");

		if ((j = _job_desc_msg_create_from_opts(opt_local)) == NULL)
			return NULL;
		if (!first_job)
			first_job = j;

		j->origin_cluster = xstrdup(slurmctld_conf.cluster_name);

		/* Do not re-use existing job id when submitting new job
		 * from within a running job */
		if ((j->job_id != NO_VAL) && !opt_local->jobid_set) {
			if (jobid_log) {
				jobid_log = false;	/* log once */
				info("WARNING: Creating SLURM job allocation from within "
				     "another allocation");
				info("WARNING: You are attempting to initiate a second job");
			}
			if (!opt_local->jobid_set) /* Let slurmctld set jobid */
				j->job_id = NO_VAL;
		}

		list_append(job_req_list, j);
	}
	list_iterator_destroy(opt_iter);

	if (!first_job) {
		error("%s: No job requests found", __func__);
		return NULL;
	}

	if (first_opt && first_opt->clusters &&
	    (slurmdb_get_first_pack_cluster(job_req_list, first_opt->clusters,
					    &working_cluster_rec)
	     != SLURM_SUCCESS)) {
		print_db_notok(first_opt->clusters, 0);
		return NULL;
	}

	callbacks.ping = _ping_handler;
	callbacks.timeout = _timeout_handler;
	callbacks.job_complete = _job_complete_handler;
	callbacks.job_suspend = NULL;
	callbacks.user_msg = _user_msg_handler;
	callbacks.node_fail = _node_fail_handler;

	/* create message thread to handle pings and such from slurmctld */
	msg_thr = slurm_allocation_msg_thr_create(&first_job->other_port,
						  &callbacks);

	/* NOTE: Do not process signals in separate pthread. The signal will
	 * cause slurm_allocate_resources_blocking() to exit immediately. */
	if (handle_signals) {
		xsignal_unblock(sig_array);
		for (i = 0; sig_array[i]; i++)
			xsignal(sig_array[i], _signal_while_allocating);
	}

	while (first_opt && !job_resp_list) {
		job_resp_list = slurm_allocate_pack_job_blocking(job_req_list,
				 first_opt->immediate, _set_pending_job_id);
		if (destroy_job) {
			/* cancelled by signal */
			break;
		} else if (!job_resp_list && !_retry()) {
			break;
		}
	}

	if (job_resp_list && !destroy_job) {
		/*
		 * Allocation granted!
		 */

		opt_iter  = list_iterator_create(opt_list);
		resp_iter = list_iterator_create(job_resp_list);
		while ((opt_local = list_next(opt_iter))) {
			resp = (resource_allocation_response_msg_t *)
			       list_next(resp_iter);
			if (!resp)
				break;

			if (pending_job_id == 0)
				pending_job_id = resp->job_id;
			if (my_job_id == 0) {
				my_job_id = resp->job_id;
				i = list_count(opt_list);
				k = list_count(job_resp_list);
				if (i != k) {
					error("%s: request count != response count (%d != %d)",
					      __func__, i, k);
					goto relinquish;
				}
			}

			/*
			 * These values could be changed while the job was
			 * pending so overwrite the request with what was
			 * allocated so we don't have issues when we use them
			 * in the step creation.
			 *
			 * NOTE: pn_min_memory here is an int64, not uint64.
			 * These operations may have some bizarre side effects
			 */
			if (opt_local->pn_min_memory != NO_VAL64)
				opt_local->pn_min_memory =
					(resp->pn_min_memory & (~MEM_PER_CPU));
			else if (opt_local->mem_per_cpu != NO_VAL64)
				opt_local->mem_per_cpu =
					(resp->pn_min_memory & (~MEM_PER_CPU));

#ifdef HAVE_BG
			uint32_t node_cnt = 0;
			select_g_select_jobinfo_get(resp->select_jobinfo,
						    SELECT_JOBDATA_NODE_CNT,
						    &node_cnt);
			if ((node_cnt == 0) || (node_cnt == NO_VAL)) {
				opt_local->min_nodes = node_cnt;
				opt_local->max_nodes = node_cnt;
			} /* else we just use the original request */

			if (!_wait_bluegene_block_ready(resp)) {
				if (!destroy_job)
					error("Something is wrong with the "
					      "boot of the block.");
				goto relinquish;
			}
#else
			opt_local->min_nodes = resp->node_cnt;
			opt_local->max_nodes = resp->node_cnt;

			if (resp->working_cluster_rec)
				slurm_setup_remote_working_cluster(resp);

			if (!_wait_nodes_ready(resp)) {
				if (!destroy_job)
					error("Something is wrong with the "
					      "boot of the nodes.");
				goto relinquish;
			}
#endif
		}
		list_iterator_destroy(resp_iter);
		list_iterator_destroy(opt_iter);
	} else if (destroy_job) {
		goto relinquish;
	}

	if (handle_signals)
		xsignal_block(sig_array);

	return job_resp_list;

relinquish:
	if (job_resp_list) {
		if (!destroy_job && my_job_id)
			slurm_complete_job(my_job_id, 1);
		list_destroy(job_resp_list);
	}
	exit(error_exit);
	return NULL;
}
Beispiel #12
0
/*
 * Allocate nodes from the slurm controller -- retrying the attempt
 * if the controller appears to be down, and optionally waiting for
 * resources if none are currently available (see opt.immediate)
 *
 * Returns a pointer to a resource_allocation_response_msg which must
 * be freed with slurm_free_resource_allocation_response_msg()
 */
extern resource_allocation_response_msg_t *
	allocate_nodes(bool handle_signals, slurm_opt_t *opt_local)

{
	srun_opt_t *srun_opt = opt_local->srun_opt;
	resource_allocation_response_msg_t *resp = NULL;
	job_desc_msg_t *j;
	slurm_allocation_callbacks_t callbacks;
	int i;

	xassert(srun_opt);

	if (srun_opt->relative_set && srun_opt->relative)
		fatal("--relative option invalid for job allocation request");

	if ((j = _job_desc_msg_create_from_opts(&opt)) == NULL)
		return NULL;

	if (opt_local->clusters &&
	    (slurmdb_get_first_avail_cluster(j, opt_local->clusters,
					     &working_cluster_rec)
	     != SLURM_SUCCESS)) {
		print_db_notok(opt_local->clusters, 0);
		return NULL;
	}

	j->origin_cluster = xstrdup(slurmctld_conf.cluster_name);

	/* Do not re-use existing job id when submitting new job
	 * from within a running job */
	if ((j->job_id != NO_VAL) && !opt_local->jobid_set) {
		info("WARNING: Creating SLURM job allocation from within "
		     "another allocation");
		info("WARNING: You are attempting to initiate a second job");
		if (!opt_local->jobid_set)	/* Let slurmctld set jobid */
			j->job_id = NO_VAL;
	}
	callbacks.ping = _ping_handler;
	callbacks.timeout = _timeout_handler;
	callbacks.job_complete = _job_complete_handler;
	callbacks.job_suspend = NULL;
	callbacks.user_msg = _user_msg_handler;
	callbacks.node_fail = _node_fail_handler;

	/* create message thread to handle pings and such from slurmctld */
	msg_thr = slurm_allocation_msg_thr_create(&j->other_port, &callbacks);

	/* NOTE: Do not process signals in separate pthread. The signal will
	 * cause slurm_allocate_resources_blocking() to exit immediately. */
	if (handle_signals) {
		xsignal_unblock(sig_array);
		for (i = 0; sig_array[i]; i++)
			xsignal(sig_array[i], _signal_while_allocating);
	}

	while (!resp) {
		resp = slurm_allocate_resources_blocking(j,
							 opt_local->immediate,
							 _set_pending_job_id);
		if (destroy_job) {
			/* cancelled by signal */
			break;
		} else if (!resp && !_retry()) {
			break;
		}
	}

	if (resp)
		print_multi_line_string(resp->job_submit_user_msg, -1);

	if (resp && !destroy_job) {
		/*
		 * Allocation granted!
		 */
		pending_job_id = resp->job_id;

		/*
		 * These values could be changed while the job was
		 * pending so overwrite the request with what was
		 * allocated so we don't have issues when we use them
		 * in the step creation.
		 */
		opt_local->pn_min_memory = NO_VAL64;
		opt_local->mem_per_cpu   = NO_VAL64;
		if (resp->pn_min_memory != NO_VAL64) {
			if (resp->pn_min_memory & MEM_PER_CPU) {
				opt_local->mem_per_cpu = (resp->pn_min_memory &
							 (~MEM_PER_CPU));
			} else {
				opt_local->pn_min_memory = resp->pn_min_memory;
			}
		}

#ifdef HAVE_BG
		uint32_t node_cnt = 0;
		select_g_select_jobinfo_get(resp->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &node_cnt);
		if ((node_cnt == 0) || (node_cnt == NO_VAL)) {
			opt_local->min_nodes = node_cnt;
			opt_local->max_nodes = node_cnt;
		} /* else we just use the original request */

		if (!_wait_bluegene_block_ready(resp)) {
			if (!destroy_job)
				error("Something is wrong with the "
				      "boot of the block.");
			goto relinquish;
		}
#else
		opt_local->min_nodes = resp->node_cnt;
		opt_local->max_nodes = resp->node_cnt;

		if (resp->working_cluster_rec)
			slurm_setup_remote_working_cluster(resp);

		if (!_wait_nodes_ready(resp)) {
			if (!destroy_job)
				error("Something is wrong with the boot of the nodes.");
			goto relinquish;
		}
#endif
	} else if (destroy_job) {
		goto relinquish;
	}

	if (handle_signals)
		xsignal_block(sig_array);

	job_desc_msg_destroy(j);

	return resp;

relinquish:
	if (resp) {
		if (!destroy_job)
			slurm_complete_job(resp->job_id, 1);
		slurm_free_resource_allocation_response_msg(resp);
	}
	exit(error_exit);
	return NULL;
}