Пример #1
0
/* Wait up to sleep_time for RPC from slurmctld indicating resource allocation
 * has occured.
 * IN sleep_time: delay in seconds (0 means unbounded wait)
 * OUT resp: resource allocation response message
 * RET 1 if resp is filled in, 0 otherwise */
static int
_wait_for_alloc_rpc(const listen_t *listen, int sleep_time,
		    resource_allocation_response_msg_t **resp)
{
	struct pollfd fds[1];
	int rc;
	int timeout_ms;

	if (listen == NULL) {
		error("Listening port not found");
		sleep(MAX(sleep_time, 1));
		return SLURM_ERROR;
	}

	fds[0].fd = listen->fd;
	fds[0].events = POLLIN;

	if (sleep_time != 0) {
		timeout_ms = sleep_time * 1000;
	} else {
		timeout_ms = -1;
	}
	while ((rc = poll(fds, 1, timeout_ms)) < 0) {
		switch (errno) {
			case EAGAIN:
			case EINTR:
				*resp = NULL;
				return -1;
			case EBADF:
			case ENOMEM:
			case EINVAL:
			case EFAULT:
				error("poll: %m");
				*resp = NULL;
				return -1;
			default:
				error("poll: %m. Continuing...");
		}
	}

	if (rc == 0) { /* poll timed out */
		errno = ETIMEDOUT;
	} else if (fds[0].revents & POLLIN) {
		return (_accept_msg_connection(listen->fd, resp));
	}

	return 0;
}
Пример #2
0
static void _wait_for_allocation_response(uint32_t job_id,
					  const listen_t *listen,
					  uint16_t msg_type, int timeout,
					  void **resp)
{
	int errnum, rc;

	info("job %u queued and waiting for resources", job_id);
	*resp = NULL;
	if ((rc = _wait_for_alloc_rpc(listen, timeout)) == 1)
		rc = _accept_msg_connection(listen->fd, msg_type, resp);
	if (rc <= 0) {
		errnum = errno;
		/* Maybe the resource allocation response RPC got lost
		 * in the mail; surely it should have arrived by now.
		 * Let's see if the controller thinks that the allocation
		 * has been granted.
		 */
		if (msg_type == RESPONSE_RESOURCE_ALLOCATION) {
			if (slurm_allocation_lookup(job_id,
					(resource_allocation_response_msg_t **)
					resp) >= 0)
				return;
		} else if (msg_type == RESPONSE_JOB_PACK_ALLOCATION) {
			if (slurm_pack_job_lookup(job_id, (List *) resp) >= 0)
				return;
		} else {
			error("%s: Invalid msg_type (%u)", __func__, msg_type);
		}

		if (slurm_get_errno() == ESLURM_JOB_PENDING) {
			debug3("Still waiting for allocation");
			errno = errnum;
			return;
		} else {
			debug3("Unable to confirm allocation for job %u: %m",
			       job_id);
			return;
		}
	}
	info("job %u has been allocated resources", job_id);
	return;
}