/* Wait up to sleep_time for RPC from slurmctld indicating resource allocation * has occured. * IN sleep_time: delay in seconds (0 means unbounded wait) * OUT resp: resource allocation response message * RET 1 if resp is filled in, 0 otherwise */ static int _wait_for_alloc_rpc(const listen_t *listen, int sleep_time, resource_allocation_response_msg_t **resp) { struct pollfd fds[1]; int rc; int timeout_ms; if (listen == NULL) { error("Listening port not found"); sleep(MAX(sleep_time, 1)); return SLURM_ERROR; } fds[0].fd = listen->fd; fds[0].events = POLLIN; if (sleep_time != 0) { timeout_ms = sleep_time * 1000; } else { timeout_ms = -1; } while ((rc = poll(fds, 1, timeout_ms)) < 0) { switch (errno) { case EAGAIN: case EINTR: *resp = NULL; return -1; case EBADF: case ENOMEM: case EINVAL: case EFAULT: error("poll: %m"); *resp = NULL; return -1; default: error("poll: %m. Continuing..."); } } if (rc == 0) { /* poll timed out */ errno = ETIMEDOUT; } else if (fds[0].revents & POLLIN) { return (_accept_msg_connection(listen->fd, resp)); } return 0; }
static void _wait_for_allocation_response(uint32_t job_id, const listen_t *listen, uint16_t msg_type, int timeout, void **resp) { int errnum, rc; info("job %u queued and waiting for resources", job_id); *resp = NULL; if ((rc = _wait_for_alloc_rpc(listen, timeout)) == 1) rc = _accept_msg_connection(listen->fd, msg_type, resp); if (rc <= 0) { errnum = errno; /* Maybe the resource allocation response RPC got lost * in the mail; surely it should have arrived by now. * Let's see if the controller thinks that the allocation * has been granted. */ if (msg_type == RESPONSE_RESOURCE_ALLOCATION) { if (slurm_allocation_lookup(job_id, (resource_allocation_response_msg_t **) resp) >= 0) return; } else if (msg_type == RESPONSE_JOB_PACK_ALLOCATION) { if (slurm_pack_job_lookup(job_id, (List *) resp) >= 0) return; } else { error("%s: Invalid msg_type (%u)", __func__, msg_type); } if (slurm_get_errno() == ESLURM_JOB_PENDING) { debug3("Still waiting for allocation"); errno = errnum; return; } else { debug3("Unable to confirm allocation for job %u: %m", job_id); return; } } info("job %u has been allocated resources", job_id); return; }