Exemplo n.º 1
0
/*
 * Attempt to start a job
 * jobid     (IN) - job id
 * task_cnt  (IN) - total count of tasks to start
 * hostlist  (IN) - SLURM hostlist expression with no repeated hostnames
 * tasklist  (IN/OUT) - comma separated list of hosts with tasks to be started,
 *                  list hostname once per task to start
 * comment_ptr (IN) - new comment field for the job or NULL for no change
 * err_code (OUT) - Moab error code
 * err_msg  (OUT) - Moab error message
 */
static int	_start_job(uint32_t jobid, int task_cnt, char *hostlist,
			char *tasklist, char *comment_ptr,
			int *err_code, char **err_msg)
{
	int rc = 0, old_task_cnt = 1;
	struct job_record *job_ptr;
	/* Write lock on job info, read lock on node info */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK };
	char *new_node_list = NULL;
	static char tmp_msg[128];
	bitstr_t *new_bitmap = (bitstr_t *) NULL;
	bitstr_t *save_req_bitmap = (bitstr_t *) NULL;
	bitoff_t i, bsize;
	int ll; /* layout info index */
	char *node_name, *node_idx, *node_cur, *save_req_nodes = NULL;
	size_t node_name_len;
	static uint32_t cr_test = 0, cr_enabled = 0;

	if (cr_test == 0) {
		select_g_get_info_from_plugin(SELECT_CR_PLUGIN, NULL,
						&cr_enabled);
		cr_test = 1;
	}

	lock_slurmctld(job_write_lock);
	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		*err_code = -700;
		*err_msg = "No such job";
		error("wiki: Failed to find job %u", jobid);
		rc = -1;
		goto fini;
	}

	if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) {
		*err_code = -700;
		*err_msg = "Job not pending, can't start";
		error("wiki: Attempt to start job %u in state %s",
			jobid, job_state_string(job_ptr->job_state));
		rc = -1;
		goto fini;
	}

	if (comment_ptr) {
		char *reserved = strstr(comment_ptr, "RESERVED:");
		if (reserved) {
			reserved += 9;
			job_ptr->details->reserved_resources =
				strtol(reserved, NULL, 10);
		}
		xfree(job_ptr->comment);
		job_ptr->comment = xstrdup(comment_ptr);
	}

	if (task_cnt) {
		new_node_list = xstrdup(hostlist);
		if (node_name2bitmap(new_node_list, false, &new_bitmap) != 0) {
			*err_code = -700;
			*err_msg = "Invalid TASKLIST";
			error("wiki: Attempt to set invalid node list for "
				"job %u, %s",
				jobid, hostlist);
			xfree(new_node_list);
			rc = -1;
			goto fini;
		}

		if (!bit_super_set(new_bitmap, avail_node_bitmap)) {
			/* Selected node is UP and not responding
			 * or it just went DOWN */
			*err_code = -700;
			*err_msg = "TASKLIST includes non-responsive node";
			error("wiki: Attempt to use non-responsive nodes for "
				"job %u, %s",
				jobid, hostlist);
			xfree(new_node_list);
			FREE_NULL_BITMAP(new_bitmap);
			rc = -1;
			goto fini;
		}

		/* User excluded node list incompatible with Wiki
		 * Exclude all nodes not explicitly requested */
		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);
		job_ptr->details->exc_node_bitmap = bit_copy(new_bitmap);
		bit_not(job_ptr->details->exc_node_bitmap);
	}

	/* Build layout information from tasklist (assuming that Moab
	 * sends a non-bracketed list of nodes, repeated as many times
	 * as cpus should be used per node); at this point, node names
	 * are comma-separated. This is _not_ a fast algorithm as it
	 * performs many string compares. */
	xfree(job_ptr->details->req_node_layout);
	if (task_cnt && cr_enabled) {
		uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task);
		job_ptr->details->req_node_layout = (uint16_t *)
			xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t));
		bsize = bit_size(new_bitmap);
		for (i = 0, ll = -1; i < bsize; i++) {
			if (!bit_test(new_bitmap, i))
				continue;
			ll++;
			node_name = node_record_table_ptr[i].name;
			node_name_len  = strlen(node_name);
			if (node_name_len == 0)
				continue;
			node_cur = tasklist;
			while (*node_cur) {
				if ((node_idx = strstr(node_cur, node_name))) {
					if ((node_idx[node_name_len] == ',') ||
				 	    (node_idx[node_name_len] == '\0')) {
						job_ptr->details->
							req_node_layout[ll] +=
							cpus_per_task;
					}
					node_cur = strchr(node_idx, ',');
					if (node_cur)
						continue;
				}
				break;
			}
		}
	}

	/* save and update job state to start now */
	save_req_nodes = job_ptr->details->req_nodes;
	job_ptr->details->req_nodes = new_node_list;
	save_req_bitmap = job_ptr->details->req_node_bitmap;
	job_ptr->details->req_node_bitmap = new_bitmap;
	old_task_cnt = job_ptr->details->min_cpus;
	job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt);
	job_ptr->priority = 100000000;

 fini:	unlock_slurmctld(job_write_lock);
	if (rc)
		return rc;

	/* No errors so far */
	(void) schedule(INFINITE);	/* provides own locking */

	/* Check to insure the job was actually started */
	lock_slurmctld(job_write_lock);
	if (job_ptr->job_id != jobid)
		job_ptr = find_job_record(jobid);

	if (job_ptr && (job_ptr->job_id == jobid) &&
	    (!IS_JOB_RUNNING(job_ptr))) {
		uint16_t wait_reason = 0;
		char *wait_string;

		if (IS_JOB_FAILED(job_ptr))
			wait_string = "Invalid request, job aborted";
		else {
			wait_reason = job_ptr->state_reason;
			if (wait_reason == WAIT_HELD) {
				/* some job is completing, slurmctld did
				 * not even try to schedule this job */
				wait_reason = WAIT_RESOURCES;
			}
			wait_string = job_reason_string(wait_reason);
			job_ptr->state_reason = WAIT_HELD;
			xfree(job_ptr->state_desc);
		}
		*err_code = -910 - wait_reason;
		snprintf(tmp_msg, sizeof(tmp_msg),
			"Could not start job %u(%s): %s",
			jobid, new_node_list, wait_string);
		*err_msg = tmp_msg;
		error("wiki: %s", tmp_msg);

		/* restore some of job state */
		job_ptr->priority = 0;
		job_ptr->details->min_cpus = old_task_cnt;
		rc = -1;
	}

	if (job_ptr && (job_ptr->job_id == jobid) && job_ptr->details) {
		/* Restore required node list in case job requeued */
		xfree(job_ptr->details->req_nodes);
		job_ptr->details->req_nodes = save_req_nodes;
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
		job_ptr->details->req_node_bitmap = save_req_bitmap;
		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);
		xfree(job_ptr->details->req_node_layout);
	} else {
		error("wiki: start_job(%u) job missing", jobid);
		xfree(save_req_nodes);
		FREE_NULL_BITMAP(save_req_bitmap);
	}

	unlock_slurmctld(job_write_lock);
	schedule_node_save();	/* provides own locking */
	schedule_job_save();	/* provides own locking */
	return rc;
}
Exemplo n.º 2
0
static void _notify_slurmctld_nodes(agent_info_t *agent_ptr,
				    int no_resp_cnt, int retry_cnt)
{
	ListIterator itr = NULL;
	ret_data_info_t *ret_data_info = NULL;
	state_t state;
	int is_ret_list = 1;
	/* Locks: Read config, write job, write node */
	slurmctld_lock_t node_write_lock =
	    { READ_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK };
	thd_t *thread_ptr = agent_ptr->thread_struct;
	int i;

	/* Notify slurmctld of non-responding nodes */
	if (no_resp_cnt) {
		/* Update node table data for non-responding nodes */
		lock_slurmctld(node_write_lock);
		if (agent_ptr->msg_type == REQUEST_BATCH_JOB_LAUNCH) {
			/* Requeue the request */
			batch_job_launch_msg_t *launch_msg_ptr =
					*agent_ptr->msg_args_pptr;
			uint32_t job_id = launch_msg_ptr->job_id;
			job_complete(job_id, 0, true, false, 0);
		}
		unlock_slurmctld(node_write_lock);
	}
	if (retry_cnt && agent_ptr->retry)
		_queue_agent_retry(agent_ptr, retry_cnt);

	/* Update last_response on responding nodes */
	lock_slurmctld(node_write_lock);
	for (i = 0; i < agent_ptr->thread_count; i++) {
		char *down_msg, *node_names;
		if (!thread_ptr[i].ret_list) {
			state = thread_ptr[i].state;
			is_ret_list = 0;
			goto switch_on_state;
		}
		is_ret_list = 1;

		itr = list_iterator_create(thread_ptr[i].ret_list);
		while ((ret_data_info = list_next(itr))) {
			state = ret_data_info->err;
		switch_on_state:
			switch(state) {
			case DSH_NO_RESP:
				if (!is_ret_list) {
					node_not_resp(thread_ptr[i].nodelist,
						      thread_ptr[i].
						      start_time);
				} else {
					node_not_resp(ret_data_info->node_name,
						      thread_ptr[i].start_time);
				}
				break;
			case DSH_FAILED:
				if (is_ret_list)
					node_names = ret_data_info->node_name;
				else
					node_names = thread_ptr[i].nodelist;
#ifdef HAVE_FRONT_END
				down_msg = "";
#else
				set_node_down(node_names,
					      "Prolog/Epilog failure");
				down_msg = ", set to state DOWN";
#endif
				error("Prolog/Epilog failure on nodes %s%s",
				      node_names, down_msg);
				break;
			case DSH_DONE:
				if (!is_ret_list)
					node_did_resp(thread_ptr[i].nodelist);
				else
					node_did_resp(ret_data_info->node_name);
				break;
			default:
				if (!is_ret_list) {
					error("unknown state returned for %s",
					      thread_ptr[i].nodelist);
				} else {
					error("unknown state returned for %s",
					      ret_data_info->node_name);
				}
				break;
			}
			if (!is_ret_list)
				goto finished;
		}
		list_iterator_destroy(itr);
finished:	;
	}
	unlock_slurmctld(node_write_lock);
	if (run_scheduler) {
		run_scheduler = false;
		/* below functions all have their own locking */
		if (schedule(0))	{
			schedule_job_save();
			schedule_node_save();
		}
	}
	if ((agent_ptr->msg_type == REQUEST_PING) ||
	    (agent_ptr->msg_type == REQUEST_HEALTH_CHECK) ||
	    (agent_ptr->msg_type == REQUEST_NODE_REGISTRATION_STATUS))
		ping_end();
}