Exemplo n.º 1
0
static void _wait_for_allocation_response(uint32_t job_id,
					  const listen_t *listen,
					  uint16_t msg_type, int timeout,
					  void **resp)
{
	int errnum, rc;

	info("job %u queued and waiting for resources", job_id);
	*resp = NULL;
	if ((rc = _wait_for_alloc_rpc(listen, timeout)) == 1)
		rc = _accept_msg_connection(listen->fd, msg_type, resp);
	if (rc <= 0) {
		errnum = errno;
		/* Maybe the resource allocation response RPC got lost
		 * in the mail; surely it should have arrived by now.
		 * Let's see if the controller thinks that the allocation
		 * has been granted.
		 */
		if (msg_type == RESPONSE_RESOURCE_ALLOCATION) {
			if (slurm_allocation_lookup(job_id,
					(resource_allocation_response_msg_t **)
					resp) >= 0)
				return;
		} else if (msg_type == RESPONSE_JOB_PACK_ALLOCATION) {
			if (slurm_pack_job_lookup(job_id, (List *) resp) >= 0)
				return;
		} else {
			error("%s: Invalid msg_type (%u)", __func__, msg_type);
		}

		if (slurm_get_errno() == ESLURM_JOB_PENDING) {
			debug3("Still waiting for allocation");
			errno = errnum;
			return;
		} else {
			debug3("Unable to confirm allocation for job %u: %m",
			       job_id);
			return;
		}
	}
	info("job %u has been allocated resources", job_id);
	return;
}
Exemplo n.º 2
0
extern List existing_allocation(void)
{
	uint32_t old_job_id;
	List job_resp_list = NULL;

	if (opt.jobid == NO_VAL)
		return NULL;

	old_job_id = (uint32_t) opt.jobid;
	if (slurm_pack_job_lookup(old_job_id, &job_resp_list) < 0) {
		if (opt.srun_opt->parallel_debug || opt.jobid_set)
			return NULL;    /* create new allocation as needed */
		if (errno == ESLURM_ALREADY_DONE)
			error("SLURM job %u has expired", old_job_id);
		else
			error("Unable to confirm allocation for job %u: %m",
			      old_job_id);
		info("Check SLURM_JOB_ID environment variable. Expired or invalid job %u",
		     old_job_id);
		exit(error_exit);
	}

	return job_resp_list;
}