Пример #1
0
/* Get total node count and lead job ID from RESPONSE_JOB_PACK_ALLOCATION */
static void _pack_alloc_test(List resp, uint32_t *node_cnt, uint32_t *job_id)
{
	resource_allocation_response_msg_t *alloc;
	uint32_t inx = 0, pack_node_cnt = 0, pack_job_id = 0;
	ListIterator iter;

	xassert(resp);
	iter = list_iterator_create(resp);
	while ((alloc = (resource_allocation_response_msg_t *)list_next(iter))){
		pack_node_cnt += alloc->node_cnt;
		if (pack_job_id == 0)
			pack_job_id = alloc->job_id;
		print_multi_line_string(alloc->job_submit_user_msg, inx);
		inx++;
	}
	list_iterator_destroy(iter);

	*job_id   = pack_job_id;
	*node_cnt = pack_node_cnt;
}
Пример #2
0
/*
 * slurm_pack_job_will_run - determine if a heterogenous job would execute
 *	immediately if submitted now
 * IN job_req_list - List of job_desc_msg_t structures describing the resource
 *		allocation request
 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set
 */
extern int slurm_pack_job_will_run(List job_req_list)
{
	job_desc_msg_t *req;
	will_run_response_msg_t *will_run_resp;
	char buf[64], *sep = "";
	int rc = SLURM_SUCCESS, inx = 0;
	ListIterator iter, itr;
	time_t first_start = (time_t) 0;
	uint32_t first_job_id = 0, tot_proc_count = 0, *job_id_ptr;
	hostset_t hs = NULL;
	char *job_list = NULL;

	if (!job_req_list || (list_count(job_req_list) == 0)) {
		error("No job descriptors input");
		return SLURM_ERROR;
	}

	iter = list_iterator_create(job_req_list);
	while ((req = (job_desc_msg_t *) list_next(iter))) {
		will_run_resp = NULL;
		rc = slurm_job_will_run2(req, &will_run_resp);

		if (will_run_resp)
			print_multi_line_string(
				will_run_resp->job_submit_user_msg,
				inx, LOG_LEVEL_INFO);

		if ((rc == SLURM_SUCCESS) && will_run_resp) {
			if (first_job_id == 0)
				first_job_id = will_run_resp->job_id;
			if ((first_start == 0) ||
			    (first_start < will_run_resp->start_time))
				first_start = will_run_resp->start_time;
			tot_proc_count += will_run_resp->proc_cnt;
			if (hs)
				hostset_insert(hs, will_run_resp->node_list);
			else
				hs = hostset_create(will_run_resp->node_list);

			if (will_run_resp->preemptee_job_id) {
				itr = list_iterator_create(will_run_resp->
							   preemptee_job_id);
				while ((job_id_ptr = list_next(itr))) {
					if (job_list)
						sep = ",";
					xstrfmtcat(job_list, "%s%u", sep,
						   *job_id_ptr);
				}
				list_iterator_destroy(itr);
			}

			slurm_free_will_run_response_msg(will_run_resp);
		}
		if (rc != SLURM_SUCCESS)
			break;
		inx++;
	}
	list_iterator_destroy(iter);


	if (rc == SLURM_SUCCESS) {
		char node_list[1028] = "";

		if (hs)
			hostset_ranged_string(hs, sizeof(node_list), node_list);
		slurm_make_time_str(&first_start, buf, sizeof(buf));
		info("Job %u to start at %s using %u processors on %s",
		     first_job_id, buf, tot_proc_count, node_list);
		if (job_list)
			info("  Preempts: %s", job_list);
	}

	if (hs)
		hostset_destroy(hs);
	xfree(job_list);

	return rc;
}
Пример #3
0
/*
 * slurm_job_will_run - determine if a job would execute immediately if
 *	submitted now
 * IN job_desc_msg - description of resource allocation request
 * RET SLURM_SUCCESS on success, otherwise return SLURM_ERROR with errno set
 */
int slurm_job_will_run(job_desc_msg_t *req)
{
	will_run_response_msg_t *will_run_resp = NULL;
	char buf[64];
	int rc;
	char *cluster_name = NULL;
	void *ptr = NULL;

	if (working_cluster_rec)
		cluster_name = working_cluster_rec->name;
	else
		cluster_name = slurmctld_conf.cluster_name;
	if (!slurm_load_federation(&ptr) &&
	    cluster_in_federation(ptr, cluster_name))
		rc = _fed_job_will_run(req, &will_run_resp, ptr);
	else
		rc = slurm_job_will_run2(req, &will_run_resp);

	if (will_run_resp)
		print_multi_line_string(
			will_run_resp->job_submit_user_msg,
			-1, LOG_LEVEL_INFO);

	if ((rc == 0) && will_run_resp) {
		slurm_make_time_str(&will_run_resp->start_time,
				    buf, sizeof(buf));
		if (will_run_resp->part_name) {
			info("Job %u to start at %s using %u processors on nodes %s in partition %s",
			     will_run_resp->job_id, buf,
			     will_run_resp->proc_cnt,
			     will_run_resp->node_list,
			     will_run_resp->part_name);
		} else {
			/*
			 * Partition name not provided from slurmctld v17.11
			 * or earlier. Remove this in the future.
			 */
			info("Job %u to start at %s using %u processors on nodes %s",
			     will_run_resp->job_id, buf,
			     will_run_resp->proc_cnt,
			     will_run_resp->node_list);
		}
		if (will_run_resp->preemptee_job_id) {
			ListIterator itr;
			uint32_t *job_id_ptr;
			char *job_list = NULL, *sep = "";
			itr = list_iterator_create(will_run_resp->
						   preemptee_job_id);
			while ((job_id_ptr = list_next(itr))) {
				if (job_list)
					sep = ",";
				xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr);
			}
			list_iterator_destroy(itr);
			info("  Preempts: %s", job_list);
			xfree(job_list);
		}

		slurm_free_will_run_response_msg(will_run_resp);
	}

	if (ptr)
		slurm_destroy_federation_rec(ptr);

	return rc;
}
Пример #4
0
/*
 * slurm_allocate_resources_blocking
 *	allocate resources for a job request.  This call will block until
 *	the allocation is granted, or the specified timeout limit is reached.
 * IN req - description of resource allocation request
 * IN timeout - amount of time, in seconds, to wait for a response before
 * 	giving up.
 *	A timeout of zero will wait indefinitely.
 * IN pending_callback - If the allocation cannot be granted immediately,
 *      the controller will put the job in the PENDING state.  If
 *      pending callback is not NULL, it will be called with the job_id
 *      of the pending job as the sole parameter.
 *
 * RET allocation structure on success, NULL on error set errno to
 *	indicate the error (errno will be ETIMEDOUT if the timeout is reached
 *      with no allocation granted)
 * NOTE: free the response using slurm_free_resource_allocation_response_msg()
 */
resource_allocation_response_msg_t *
slurm_allocate_resources_blocking (const job_desc_msg_t *user_req,
				   time_t timeout,
				   void(*pending_callback)(uint32_t job_id))
{
	int rc;
	slurm_msg_t req_msg;
	slurm_msg_t resp_msg;
	resource_allocation_response_msg_t *resp = NULL;
	uint32_t job_id;
	job_desc_msg_t *req;
	listen_t *listen = NULL;
	int errnum = SLURM_SUCCESS;
	bool already_done = false;

	slurm_msg_t_init(&req_msg);
	slurm_msg_t_init(&resp_msg);

	/* make a copy of the user's job description struct so that we
	 * can make changes before contacting the controller */
	req = (job_desc_msg_t *)xmalloc(sizeof(job_desc_msg_t));
	if (req == NULL)
		return NULL;
	memcpy(req, user_req, sizeof(job_desc_msg_t));

	/*
	 * set Node and session id for this request
	 */
	if (req->alloc_sid == NO_VAL)
		req->alloc_sid = getsid(0);

	if (!req->immediate) {
		listen = _create_allocation_response_socket();
		if (listen == NULL) {
			xfree(req);
			return NULL;
		}
		req->alloc_resp_port = listen->port;
	}

	req_msg.msg_type = REQUEST_RESOURCE_ALLOCATION;
	req_msg.data     = req;

	rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg,
					    working_cluster_rec);

	if (rc == SLURM_ERROR) {
		int errnum = errno;
		destroy_forward(&req_msg.forward);
		destroy_forward(&resp_msg.forward);
		if (!req->immediate)
			_destroy_allocation_response_socket(listen);
		xfree(req);
		errno = errnum;
		return NULL;
	}

	switch (resp_msg.msg_type) {
	case RESPONSE_SLURM_RC:
		if (_handle_rc_msg(&resp_msg) < 0) {
			/* will reach this when the allocation fails */
			errnum = errno;
		} else {
			/* shouldn't get here */
			errnum = SLURM_ERROR;
		}
		break;
	case RESPONSE_RESOURCE_ALLOCATION:
		/* Yay, the controller has acknowledged our request!
		 * Test if we have an allocation yet? */
		resp = (resource_allocation_response_msg_t *) resp_msg.data;
		if (resp->node_cnt > 0) {
			/* yes, allocation has been granted */
			errno = SLURM_SUCCESS;
		} else if (!req->immediate) {
			if (resp->error_code != SLURM_SUCCESS)
				info("%s", slurm_strerror(resp->error_code));
			/* no, we need to wait for a response */

			/* print out any user messages before we wait. */
			print_multi_line_string(resp->job_submit_user_msg,
						-1, LOG_LEVEL_INFO);

			job_id = resp->job_id;
			slurm_free_resource_allocation_response_msg(resp);
			if (pending_callback != NULL)
				pending_callback(job_id);
			_wait_for_allocation_response(job_id, listen,
						RESPONSE_RESOURCE_ALLOCATION,
						timeout, (void **) &resp);
			/* If NULL, we didn't get the allocation in
			   the time desired, so just free the job id */
			if ((resp == NULL) && (errno != ESLURM_ALREADY_DONE)) {
				errnum = errno;
				slurm_complete_job(job_id, -1);
			}
			if ((resp == NULL) && (errno == ESLURM_ALREADY_DONE))
				already_done = true;
		}
		break;
	default:
		errnum = SLURM_UNEXPECTED_MSG_ERROR;
		resp = NULL;
	}

	destroy_forward(&req_msg.forward);
	destroy_forward(&resp_msg.forward);
	if (!req->immediate)
		_destroy_allocation_response_socket(listen);
	xfree(req);
	if (!resp && already_done && (errnum == SLURM_SUCCESS))
		errnum = ESLURM_ALREADY_DONE;
	errno = errnum;
	return resp;
}
Пример #5
0
/*
 * Allocate nodes from the slurm controller -- retrying the attempt
 * if the controller appears to be down, and optionally waiting for
 * resources if none are currently available (see opt.immediate)
 *
 * Returns a pointer to a resource_allocation_response_msg which must
 * be freed with slurm_free_resource_allocation_response_msg()
 */
extern resource_allocation_response_msg_t *
	allocate_nodes(bool handle_signals, slurm_opt_t *opt_local)

{
	srun_opt_t *srun_opt = opt_local->srun_opt;
	resource_allocation_response_msg_t *resp = NULL;
	job_desc_msg_t *j;
	slurm_allocation_callbacks_t callbacks;
	int i;

	xassert(srun_opt);

	if (srun_opt->relative_set && srun_opt->relative)
		fatal("--relative option invalid for job allocation request");

	if ((j = _job_desc_msg_create_from_opts(&opt)) == NULL)
		return NULL;

	if (opt_local->clusters &&
	    (slurmdb_get_first_avail_cluster(j, opt_local->clusters,
					     &working_cluster_rec)
	     != SLURM_SUCCESS)) {
		print_db_notok(opt_local->clusters, 0);
		return NULL;
	}

	j->origin_cluster = xstrdup(slurmctld_conf.cluster_name);

	/* Do not re-use existing job id when submitting new job
	 * from within a running job */
	if ((j->job_id != NO_VAL) && !opt_local->jobid_set) {
		info("WARNING: Creating SLURM job allocation from within "
		     "another allocation");
		info("WARNING: You are attempting to initiate a second job");
		if (!opt_local->jobid_set)	/* Let slurmctld set jobid */
			j->job_id = NO_VAL;
	}
	callbacks.ping = _ping_handler;
	callbacks.timeout = _timeout_handler;
	callbacks.job_complete = _job_complete_handler;
	callbacks.job_suspend = NULL;
	callbacks.user_msg = _user_msg_handler;
	callbacks.node_fail = _node_fail_handler;

	/* create message thread to handle pings and such from slurmctld */
	msg_thr = slurm_allocation_msg_thr_create(&j->other_port, &callbacks);

	/* NOTE: Do not process signals in separate pthread. The signal will
	 * cause slurm_allocate_resources_blocking() to exit immediately. */
	if (handle_signals) {
		xsignal_unblock(sig_array);
		for (i = 0; sig_array[i]; i++)
			xsignal(sig_array[i], _signal_while_allocating);
	}

	while (!resp) {
		resp = slurm_allocate_resources_blocking(j,
							 opt_local->immediate,
							 _set_pending_job_id);
		if (destroy_job) {
			/* cancelled by signal */
			break;
		} else if (!resp && !_retry()) {
			break;
		}
	}

	if (resp)
		print_multi_line_string(resp->job_submit_user_msg, -1);

	if (resp && !destroy_job) {
		/*
		 * Allocation granted!
		 */
		pending_job_id = resp->job_id;

		/*
		 * These values could be changed while the job was
		 * pending so overwrite the request with what was
		 * allocated so we don't have issues when we use them
		 * in the step creation.
		 */
		opt_local->pn_min_memory = NO_VAL64;
		opt_local->mem_per_cpu   = NO_VAL64;
		if (resp->pn_min_memory != NO_VAL64) {
			if (resp->pn_min_memory & MEM_PER_CPU) {
				opt_local->mem_per_cpu = (resp->pn_min_memory &
							 (~MEM_PER_CPU));
			} else {
				opt_local->pn_min_memory = resp->pn_min_memory;
			}
		}

#ifdef HAVE_BG
		uint32_t node_cnt = 0;
		select_g_select_jobinfo_get(resp->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &node_cnt);
		if ((node_cnt == 0) || (node_cnt == NO_VAL)) {
			opt_local->min_nodes = node_cnt;
			opt_local->max_nodes = node_cnt;
		} /* else we just use the original request */

		if (!_wait_bluegene_block_ready(resp)) {
			if (!destroy_job)
				error("Something is wrong with the "
				      "boot of the block.");
			goto relinquish;
		}
#else
		opt_local->min_nodes = resp->node_cnt;
		opt_local->max_nodes = resp->node_cnt;

		if (resp->working_cluster_rec)
			slurm_setup_remote_working_cluster(resp);

		if (!_wait_nodes_ready(resp)) {
			if (!destroy_job)
				error("Something is wrong with the boot of the nodes.");
			goto relinquish;
		}
#endif
	} else if (destroy_job) {
		goto relinquish;
	}

	if (handle_signals)
		xsignal_block(sig_array);

	job_desc_msg_destroy(j);

	return resp;

relinquish:
	if (resp) {
		if (!destroy_job)
			slurm_complete_job(resp->job_id, 1);
		slurm_free_resource_allocation_response_msg(resp);
	}
	exit(error_exit);
	return NULL;
}