/* For each burst buffer record, set the use_time to the time at which its
 * use is expected to begin (i.e. each job's expected start time) */
extern void bb_set_use_time(bb_state_t *state_ptr)
{
	struct job_record *job_ptr;
	bb_alloc_t *bb_alloc = NULL;
	time_t now = time(NULL);
	int i;

	state_ptr->next_end_time = now + 60 * 60; /* Start estimate now+1hour */
	for (i = 0; i < BB_HASH_SIZE; i++) {
		bb_alloc = state_ptr->bb_ahash[i];
		while (bb_alloc) {
			if (bb_alloc->job_id &&
			    ((bb_alloc->state == BB_STATE_STAGING_IN) ||
			     (bb_alloc->state == BB_STATE_STAGED_IN))) {
				job_ptr = find_job_record(bb_alloc->job_id);
				if (!job_ptr && !bb_alloc->orphaned) {
					bb_alloc->orphaned = true;
					error("%s: Job %u not found for "
					      "allocated burst buffer",
					      __func__, bb_alloc->job_id);
					bb_alloc->use_time = now + 24 * 60 * 60;
				} else if (!job_ptr) {
					bb_alloc->use_time = now + 24 * 60 * 60;
				} else if (job_ptr->start_time) {
					bb_alloc->end_time = job_ptr->end_time;
					bb_alloc->use_time = job_ptr->start_time;
				} else {
					/* Unknown start time */
					bb_alloc->use_time = now + 60 * 60;
				}
			} else if (bb_alloc->job_id) {
				job_ptr = find_job_record(bb_alloc->job_id);
				if (job_ptr)
					bb_alloc->end_time = job_ptr->end_time;
			} else {
				bb_alloc->use_time = now;
			}
			if (bb_alloc->end_time && bb_alloc->size) {
				if (bb_alloc->end_time <= now)
					state_ptr->next_end_time = now;
				else if (state_ptr->next_end_time >
					 bb_alloc->end_time) {
					state_ptr->next_end_time =
						bb_alloc->end_time;
				}
			}
			bb_alloc = bb_alloc->next;
		}
	}
}
예제 #2
0
/* Purge per-job burst buffer records when the stage-out has completed and
 * the job has been purged from Slurm */
static void _purge_bb_rec(void)
{
	static time_t time_last_purge = 0;
	time_t now = time(NULL);
	bb_alloc_t **bb_pptr, *bb_ptr = NULL;
	int i;

	if (difftime(now, time_last_purge) > 60) {	/* Once per minute */
		for (i = 0; i < BB_HASH_SIZE; i++) {
			bb_pptr = &bb_hash[i];
			bb_ptr = bb_hash[i];
			while (bb_ptr) {
				if ((bb_ptr->job_id != 0) &&
				    (bb_ptr->state >= BB_STATE_STAGED_OUT) &&
				    !find_job_record(bb_ptr->job_id)) {
					*bb_pptr = bb_ptr->next;
					xfree(bb_ptr);
					break;
				}
				bb_pptr = &bb_ptr->next;
				bb_ptr = bb_ptr->next;
			}
		}
	}
}
예제 #3
0
static void _requeue_when_finished(uint32_t job_id)
{
	/* Locks: read job */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };
	struct job_record *job_ptr;

	while (1) {
		lock_slurmctld(job_write_lock);
		job_ptr = find_job_record(job_id);
		if (IS_JOB_FINISHED(job_ptr)) {
			job_ptr->job_state = JOB_PENDING;
			job_ptr->details->submit_time = time(NULL);
			job_ptr->restart_cnt++;
			/* Since the job completion logger
			 * removes the submit we need to add it again. */
			acct_policy_add_job_submit(job_ptr);
			unlock_slurmctld(job_write_lock);
			break;
		} else {
			unlock_slurmctld(job_write_lock);
			sleep(1);
		}
	}
}
예제 #4
0
/*
 * srun_node_fail - notify srun of a node's failure
 * IN job_id    - id of job to notify
 * IN node_name - name of failed node
 */
extern void srun_node_fail (uint32_t job_id, char *node_name)
{
#ifndef HAVE_FRONT_END
	struct node_record *node_ptr;
#endif
	struct job_record *job_ptr = find_job_record (job_id);
	int bit_position = -1;
	slurm_addr_t * addr;
	srun_node_fail_msg_t *msg_arg;
	ListIterator step_iterator;
	struct step_record *step_ptr;

	xassert(job_ptr);
	xassert(node_name);
	if (!job_ptr || !IS_JOB_RUNNING(job_ptr))
		return;

#ifdef HAVE_FRONT_END
	/* Purge all jobs steps in front end mode */
#else
	if (!node_name || (node_ptr = find_node_record(node_name)) == NULL)
		return;
	bit_position = node_ptr - node_record_table_ptr;
#endif

	step_iterator = list_iterator_create(job_ptr->step_list);
	while ((step_ptr = (struct step_record *) list_next(step_iterator))) {
		if ((bit_position >= 0) &&
		    (!bit_test(step_ptr->step_node_bitmap, bit_position)))
			continue;	/* job step not on this node */
		if ( (step_ptr->port    == 0)    ||
		     (step_ptr->host    == NULL) ||
		     (step_ptr->batch_step)      ||
		     (step_ptr->host[0] == '\0') )
			continue;
		addr = xmalloc(sizeof(struct sockaddr_in));
		slurm_set_addr(addr, step_ptr->port, step_ptr->host);
		msg_arg = xmalloc(sizeof(srun_node_fail_msg_t));
		msg_arg->job_id   = job_ptr->job_id;
		msg_arg->step_id  = step_ptr->step_id;
		msg_arg->nodelist = xstrdup(node_name);
		_srun_agent_launch(addr, step_ptr->host, SRUN_NODE_FAIL,
				   msg_arg);
	}
	list_iterator_destroy(step_iterator);

	if (job_ptr->other_port && job_ptr->alloc_node && job_ptr->resp_host) {
		addr = xmalloc(sizeof(struct sockaddr_in));
		slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host);
		msg_arg = xmalloc(sizeof(srun_node_fail_msg_t));
		msg_arg->job_id   = job_id;
		msg_arg->step_id  = NO_VAL;
		msg_arg->nodelist = xstrdup(node_name);
		_srun_agent_launch(addr, job_ptr->alloc_node, SRUN_NODE_FAIL,
				   msg_arg);
	}
}
예제 #5
0
/*
 * srun_response - note that srun has responded
 * IN job_id  - id of job responding
 * IN step_id - id of step responding or NO_VAL if not a step
 */
extern void srun_response(uint32_t job_id, uint32_t step_id)
{
	struct job_record  *job_ptr = find_job_record (job_id);
	time_t now = time(NULL);

	if (job_ptr == NULL)
		return;
	job_ptr->time_last_active = now;
}
예제 #6
0
/*
 * srun_allocate - notify srun of a resource allocation
 * IN job_ptr - job allocated resources
 */
extern void srun_allocate(struct job_record *job_ptr)
{
	struct job_record *pack_job, *pack_leader;
	resource_allocation_response_msg_t *msg_arg = NULL;
	slurm_addr_t *addr;
	ListIterator iter;
	List job_resp_list = NULL;

	xassert(job_ptr);
	if (!job_ptr || !job_ptr->alloc_resp_port || !job_ptr->alloc_node ||
	    !job_ptr->resp_host || !job_ptr->job_resrcs ||
	    !job_ptr->job_resrcs->cpu_array_cnt)
		return;

	if (job_ptr->pack_job_id == 0) {
		addr = xmalloc(sizeof(struct sockaddr_in));
		slurm_set_addr(addr, job_ptr->alloc_resp_port,
			job_ptr->resp_host);

		msg_arg = build_alloc_msg(job_ptr, SLURM_SUCCESS, NULL);
		_srun_agent_launch(addr, job_ptr->alloc_node,
				   RESPONSE_RESOURCE_ALLOCATION, msg_arg,
				   job_ptr->start_protocol_ver);
	} else if (_pending_pack_jobs(job_ptr)) {
		return;
	} else if ((pack_leader = find_job_record(job_ptr->pack_job_id))) {
		addr = xmalloc(sizeof(struct sockaddr_in));
		slurm_set_addr(addr, pack_leader->alloc_resp_port,
			       pack_leader->resp_host);
		job_resp_list = list_create(_free_srun_alloc);
		iter = list_iterator_create(pack_leader->pack_job_list);
		while ((pack_job = (struct job_record *) list_next(iter))) {
			if (pack_leader->pack_job_id != pack_job->pack_job_id) {
				error("%s: Bad pack_job_list for %pJ",
				      __func__, pack_leader);
				continue;
			}
			msg_arg = build_alloc_msg(pack_job, SLURM_SUCCESS,
						  NULL);
			list_append(job_resp_list, msg_arg);
			msg_arg = NULL;
		}
		list_iterator_destroy(iter);
		_srun_agent_launch(addr, job_ptr->alloc_node,
				   RESPONSE_JOB_PACK_ALLOCATION, job_resp_list,
				   job_ptr->start_protocol_ver);
	} else {
		error("%s: Can not find pack job leader %pJ",
		      __func__, job_ptr);
	}
}
예제 #7
0
파일: job_requeue.c 프로젝트: fafik23/slurm
/* RET 0 on success, -1 on failure */
extern int	job_requeue_wiki(char *cmd_ptr, int *err_code, char **err_msg)
{
	char *arg_ptr, *tmp_char;
	uint32_t jobid;
	struct job_record *job_ptr;
	static char reply_msg[128];
	int slurm_rc;
	/* Write lock on job and node info */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };

	arg_ptr = strstr(cmd_ptr, "ARG=");
	if (arg_ptr == NULL) {
		*err_code = -300;
		*err_msg = "REQUEUEJOB lacks ARG";
		error("wiki: REQUEUEJOB lacks ARG");
		return -1;
	}
	jobid = strtoul(arg_ptr+4, &tmp_char, 10);
	if ((tmp_char[0] != '\0') && (!isspace(tmp_char[0]))) {
		*err_code = -300;
		*err_msg = "Invalid ARG value";
		error("wiki: REQUEUEJOB has invalid jobid");
		return -1;
	}

	lock_slurmctld(job_write_lock);
	slurm_rc = job_requeue(0, jobid, NULL, false, 0);
	if (slurm_rc != SLURM_SUCCESS) {
		unlock_slurmctld(job_write_lock);
		*err_code = -700;
		*err_msg = slurm_strerror(slurm_rc);
		error("wiki: Failed to requeue job %u (%m)", jobid);
		return -1;
	}

	/* We need to clear the required node list here.
	 * If the job was submitted with srun and a
	 * required node list, it gets lost here. */
	job_ptr = find_job_record(jobid);
	if (job_ptr && job_ptr->details) {
		xfree(job_ptr->details->req_nodes);
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
	}
	info("wiki: requeued job %u", jobid);
	unlock_slurmctld(job_write_lock);
	snprintf(reply_msg, sizeof(reply_msg),
		"job %u requeued successfully", jobid);
	*err_msg = reply_msg;
	return 0;
}
예제 #8
0
static int	_job_notify(uint32_t jobid, char *msg_ptr)
{
	struct job_record *job_ptr;

	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		error("wiki: NOTIFYJOB has invalid jobid %u", jobid);
		return ESLURM_INVALID_JOB_ID;
	}
	if (IS_JOB_FINISHED(job_ptr)) {
		error("wiki: NOTIFYJOB jobid %u is finished", jobid);
		return ESLURM_INVALID_JOB_ID;
	}
	srun_user_message(job_ptr, msg_ptr);
	return SLURM_SUCCESS;
}
예제 #9
0
파일: job_signal.c 프로젝트: IFCA/slurm
static int	_job_signal(uint32_t jobid, uint16_t sig_num)
{
	struct job_record *job_ptr;
	int rc = SLURM_SUCCESS;

	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL)
		return ESLURM_INVALID_JOB_ID;
	if (IS_JOB_FINISHED(job_ptr))
		return ESLURM_ALREADY_DONE;

	if (job_ptr->batch_flag)
		rc = job_signal(jobid, sig_num, 1, 0, false);
	if (rc == SLURM_SUCCESS)
		rc = job_signal(jobid, sig_num, 0, 0, false);
	return rc;
}
예제 #10
0
/*
 * srun_allocate - notify srun of a resource allocation
 * IN job_id - id of the job allocated resource
 */
extern void srun_allocate (uint32_t job_id)
{
	struct job_record *job_ptr = find_job_record (job_id);

	xassert(job_ptr);
	if (job_ptr && job_ptr->alloc_resp_port && job_ptr->alloc_node &&
	    job_ptr->resp_host && job_ptr->job_resrcs &&
	    job_ptr->job_resrcs->cpu_array_cnt) {
		slurm_addr_t * addr;
		resource_allocation_response_msg_t *msg_arg;
		job_resources_t *job_resrcs_ptr = job_ptr->job_resrcs;

		addr = xmalloc(sizeof(struct sockaddr_in));
		slurm_set_addr(addr, job_ptr->alloc_resp_port,
			job_ptr->resp_host);
		msg_arg = xmalloc(sizeof(resource_allocation_response_msg_t));
		msg_arg->job_id 	= job_ptr->job_id;
		msg_arg->node_list	= xstrdup(job_ptr->nodes);
		msg_arg->alias_list	= xstrdup(job_ptr->alias_list);
		msg_arg->num_cpu_groups	= job_resrcs_ptr->cpu_array_cnt;
		msg_arg->cpus_per_node  = xmalloc(sizeof(uint16_t) *
					  job_resrcs_ptr->cpu_array_cnt);
		if (job_ptr->details) {
			msg_arg->pn_min_memory = job_ptr->details->
						 pn_min_memory;
		}
		memcpy(msg_arg->cpus_per_node,
		       job_resrcs_ptr->cpu_array_value,
		       (sizeof(uint16_t) * job_resrcs_ptr->cpu_array_cnt));
		msg_arg->cpu_count_reps  = xmalloc(sizeof(uint32_t) *
					   job_resrcs_ptr->cpu_array_cnt);
		memcpy(msg_arg->cpu_count_reps,
		       job_resrcs_ptr->cpu_array_reps,
		       (sizeof(uint32_t) * job_resrcs_ptr->cpu_array_cnt));
		msg_arg->node_cnt	= job_ptr->node_cnt;
		msg_arg->select_jobinfo = select_g_select_jobinfo_copy(
				job_ptr->select_jobinfo);
		msg_arg->error_code	= SLURM_SUCCESS;
		_srun_agent_launch(addr, job_ptr->alloc_node,
				   RESPONSE_RESOURCE_ALLOCATION, msg_arg);
	}
}
예제 #11
0
static bool _pending_pack_jobs(struct job_record *job_ptr)
{
	struct job_record *pack_leader, *pack_job;
	ListIterator iter;
	bool pending_job = false;

	if (job_ptr->pack_job_id == 0)
		return false;

	pack_leader = find_job_record(job_ptr->pack_job_id);
	if (!pack_leader) {
		error("Job pack leader %pJ not found", job_ptr);
		return false;
	}
	if (!pack_leader->pack_job_list) {
		error("Job pack leader %pJ lacks pack_job_list",
		      job_ptr);
		return false;
	}

	iter = list_iterator_create(pack_leader->pack_job_list);
	while ((pack_job = (struct job_record *) list_next(iter))) {
		if (pack_leader->pack_job_id != pack_job->pack_job_id) {
			error("%s: Bad pack_job_list for %pJ",
			      __func__, pack_leader);
			continue;
		}
		if (IS_JOB_PENDING(pack_job)) {
			pending_job = true;
			break;
		}
	}
	list_iterator_destroy(iter);

	return pending_job;
}
예제 #12
0
static char *	_will_run_test2(uint32_t jobid, time_t start_time,
				char *node_list,
				uint32_t *preemptee, int preemptee_cnt,
				int *err_code, char **err_msg)
{
	struct job_record *job_ptr = NULL, *pre_ptr;
	struct part_record *part_ptr;
	bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
	bitstr_t *exc_core_bitmap = NULL;
	time_t start_res;
	uint32_t min_nodes, max_nodes, req_nodes;
	List preemptee_candidates = NULL, preempted_jobs = NULL;
	time_t orig_start_time;
	char *reply_msg = NULL;
	int i, rc;
	bool resv_overlap = false;

	xassert(node_list);
	debug2("wiki2: will_run2 job_id=%u start_time=%u node_list=%s",
		jobid, (uint32_t)start_time, node_list);

	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		*err_code = -700;
		*err_msg = "No such job";
		error("wiki: Failed to find job %u", jobid);
		return NULL;
	}
	if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) {
		*err_code = -700;
		*err_msg = "WillRun not applicable to non-pending job";
		error("wiki: WillRun on non-pending job %u", jobid);
		return NULL;
	}

	part_ptr = job_ptr->part_ptr;
	if (part_ptr == NULL) {
		*err_code = -700;
		*err_msg = "Job lacks a partition";
		error("wiki: Job %u lacks a partition", jobid);
		return NULL;
	}

	if (node_name2bitmap(node_list, false, &avail_bitmap) != 0) {
		*err_code = -700;
		*err_msg = "Invalid available nodes value";
		error("wiki: Attempt to set invalid available node "
		      "list for job %u, %s", jobid, node_list);
		return NULL;
	}

	/* Enforce reservation: access control, time and nodes */
	start_res = start_time;
	rc = job_test_resv(job_ptr, &start_res, true, &resv_bitmap,
			   &exc_core_bitmap, &resv_overlap);
	if (rc != SLURM_SUCCESS) {
		*err_code = -730;
		*err_msg = "Job denied access to reservation";
		error("wiki: reservation access denied for job %u", jobid);
		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		return NULL;
	}
	bit_and(avail_bitmap, resv_bitmap);
	FREE_NULL_BITMAP(resv_bitmap);

	/* Only consider nodes that are not DOWN or DRAINED */
	bit_and(avail_bitmap, avail_node_bitmap);

	/* Consider only nodes in this job's partition */
	if (part_ptr->node_bitmap)
		bit_and(avail_bitmap, part_ptr->node_bitmap);
	else {
		*err_code = -730;
		*err_msg = "Job's partition has no nodes";
		error("wiki: no nodes in partition %s for job %u",
			part_ptr->name, jobid);
		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		return NULL;
	}

	if (job_req_node_filter(job_ptr, avail_bitmap) != SLURM_SUCCESS) {
		/* Job probably has invalid feature list */
		*err_code = -730;
		*err_msg = "Job's required features not available "
			   "on selected nodes";
		error("wiki: job %u not runnable on hosts=%s",
			jobid, node_list);
		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		return NULL;
	}
	if (job_ptr->details->exc_node_bitmap) {
		bit_not(job_ptr->details->exc_node_bitmap);
		bit_and(avail_bitmap, job_ptr->details->exc_node_bitmap);
		bit_not(job_ptr->details->exc_node_bitmap);
	}
	if ((job_ptr->details->req_node_bitmap) &&
	    (!bit_super_set(job_ptr->details->req_node_bitmap,
			    avail_bitmap))) {
		*err_code = -730;
		*err_msg = "Job's required nodes not available";
		error("wiki: job %u not runnable on hosts=%s",
			jobid, node_list);
		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		return NULL;
	}

	min_nodes = MAX(job_ptr->details->min_nodes, part_ptr->min_nodes);
	if (job_ptr->details->max_nodes == 0)
		max_nodes = part_ptr->max_nodes;
	else
		max_nodes = MIN(job_ptr->details->max_nodes,
				part_ptr->max_nodes);
	max_nodes = MIN(max_nodes, 500000); /* prevent overflows */
	if (job_ptr->details->max_nodes)
		req_nodes = max_nodes;
	else
		req_nodes = min_nodes;
	if (min_nodes > max_nodes) {
		/* job's min_nodes exceeds partitions max_nodes */
		*err_code = -730;
		*err_msg = "Job's min_nodes > max_nodes";
		error("wiki: job %u not runnable on hosts=%s",
			jobid, node_list);
		FREE_NULL_BITMAP(avail_bitmap);
		FREE_NULL_BITMAP(exc_core_bitmap);
		return NULL;
	}

	if (preemptee_cnt) {
		preemptee_candidates = list_create(NULL);
		for (i=0; i<preemptee_cnt; i++) {
			if ((pre_ptr = find_job_record(preemptee[i])))
				list_append(preemptee_candidates, pre_ptr);
		}
	}

	orig_start_time = job_ptr->start_time;
	rc = select_g_job_test(job_ptr, avail_bitmap, min_nodes, max_nodes,
			       req_nodes, SELECT_MODE_WILL_RUN,
			       preemptee_candidates, &preempted_jobs,
			       exc_core_bitmap);
	FREE_NULL_LIST(preemptee_candidates);

	if (rc == SLURM_SUCCESS) {
		char *hostlist, *sep, tmp_str[128];
		uint32_t pre_cnt = 0, proc_cnt = 0;

#ifdef HAVE_BG
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
				     SELECT_JOBDATA_NODE_CNT, &proc_cnt);
#else
		proc_cnt = job_ptr->total_cpus;
#endif
		snprintf(tmp_str, sizeof(tmp_str),
			 "STARTINFO=%u TASKS=%u STARTTIME=%u NODES=",
			 job_ptr->job_id, proc_cnt,
			 (uint32_t) job_ptr->start_time);
		xstrcat(reply_msg, tmp_str);
		hostlist = bitmap2node_name(avail_bitmap);
		xstrcat(reply_msg, hostlist);
		xfree(hostlist);

		if (preempted_jobs) {
			while ((pre_ptr = list_pop(preempted_jobs))) {
				if (pre_cnt++)
					sep = ",";
				else
					sep = " PREEMPT=";
				snprintf(tmp_str, sizeof(tmp_str), "%s%u",
					 sep, pre_ptr->job_id);
				xstrcat(reply_msg, tmp_str);
			}
			FREE_NULL_LIST(preempted_jobs);
		}
	} else {
		xstrcat(reply_msg, "Jobs not runable on selected nodes");
		error("wiki: jobs not runnable on nodes");
	}

	/* Restore pending job's expected start time */
	job_ptr->start_time = orig_start_time;

	FREE_NULL_BITMAP(avail_bitmap);
	FREE_NULL_BITMAP(exc_core_bitmap);
	return reply_msg;
}
예제 #13
0
static int	_job_modify(uint32_t jobid, char *bank_ptr,
			char *depend_ptr, char *new_hostlist,
			uint32_t new_node_cnt, char *part_name_ptr,
			uint32_t new_time_limit, char *name_ptr,
			char *start_ptr, char *feature_ptr, char *env_ptr,
			char *comment_ptr, char *gres_ptr, char *wckey_ptr)
{
	struct job_record *job_ptr;
	time_t now = time(NULL);
	bool update_accounting = false;

	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		error("wiki: MODIFYJOB has invalid jobid %u", jobid);
		return ESLURM_INVALID_JOB_ID;
	}
	if (IS_JOB_FINISHED(job_ptr) || (job_ptr->details == NULL)) {
		info("wiki: MODIFYJOB jobid %u is finished", jobid);
		return ESLURM_DISABLED;
	}

	if (comment_ptr) {
		info("wiki: change job %u comment %s", jobid, comment_ptr);
		xfree(job_ptr->comment);
		job_ptr->comment = xstrdup(comment_ptr);
		last_job_update = now;
	}

	if (depend_ptr) {
		int rc = update_job_dependency(job_ptr, depend_ptr);
		if (rc == SLURM_SUCCESS) {
			info("wiki: changed job %u dependency to %s",
				jobid, depend_ptr);
		} else {
			error("wiki: changing job %u dependency to %s",
				jobid, depend_ptr);
			return EINVAL;
		}
	}

	if (env_ptr) {
		bool have_equal = false;
		char old_sep[1];
		int begin = 0, i;

		if (job_ptr->batch_flag == 0) {
			error("wiki: attempt to set environment variables "
			      "for non-batch job %u", jobid);
			return ESLURM_DISABLED;
		}
		for (i=0; ; i++) {
			if (env_ptr[i] == '=') {
				if (have_equal) {
					error("wiki: setting job %u invalid "
					      "environment variables: %s",
					      jobid, env_ptr);
					return EINVAL;
				}
				have_equal = true;
				if (env_ptr[i+1] == '\"') {
					for (i+=2; ; i++) {
						if (env_ptr[i] == '\0') {
							error("wiki: setting job %u "
							      "invalid environment "
							      "variables: %s",
					 		     jobid, env_ptr);
							return EINVAL;
						}
						if (env_ptr[i] == '\"') {
							i++;
							break;
						}
						if (env_ptr[i] == '\\') {
							i++;
						}
					}
				} else if (env_ptr[i+1] == '\'') {
					for (i+=2; ; i++) {
						if (env_ptr[i] == '\0') {
							error("wiki: setting job %u "
							      "invalid environment "
							      "variables: %s",
					 		     jobid, env_ptr);
							return EINVAL;
						}
						if (env_ptr[i] == '\'') {
							i++;
							break;
						}
						if (env_ptr[i] == '\\') {
							i++;
						}
					}
				}
			}
			if (isspace(env_ptr[i]) || (env_ptr[i] == ',')) {
				if (!have_equal) {
					error("wiki: setting job %u invalid "
					      "environment variables: %s",
					      jobid, env_ptr);
					return EINVAL;
				}
				old_sep[0] = env_ptr[i];
				env_ptr[i] = '\0';
				xrealloc(job_ptr->details->env_sup,
					 sizeof(char *) *
					 (job_ptr->details->env_cnt+1));
				job_ptr->details->env_sup
						[job_ptr->details->env_cnt++] =
						xstrdup(&env_ptr[begin]);
				info("wiki: for job %u add env: %s",
				     jobid, &env_ptr[begin]);
				env_ptr[i] = old_sep[0];
				if (isspace(old_sep[0]))
					break;
				begin = i + 1;
				have_equal = false;
			}
		}
	}

	if (new_time_limit) {
		time_t old_time = job_ptr->time_limit;
		job_ptr->time_limit = new_time_limit;
		info("wiki: change job %u time_limit to %u",
			jobid, new_time_limit);
		/* Update end_time based upon change
		 * to preserve suspend time info */
		job_ptr->end_time = job_ptr->end_time +
				((job_ptr->time_limit -
				  old_time) * 60);
		last_job_update = now;
	}

	if (bank_ptr &&
	    (update_job_account("wiki", job_ptr, bank_ptr) != SLURM_SUCCESS)) {
		return EINVAL;
	}

	if (feature_ptr) {
		if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) {
			info("wiki: change job %u features to %s",
				jobid, feature_ptr);
			job_ptr->details->features = xstrdup(feature_ptr);
			last_job_update = now;
		} else {
			error("wiki: MODIFYJOB features of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}
	}

	if (start_ptr) {
		char *end_ptr;
		uint32_t begin_time = strtol(start_ptr, &end_ptr, 10);
		if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) {
			info("wiki: change job %u begin time to %u",
				jobid, begin_time);
			job_ptr->details->begin_time = begin_time;
			last_job_update = now;
			update_accounting = true;
		} else {
			error("wiki: MODIFYJOB begin_time of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}
	}

	if (name_ptr) {
		if (IS_JOB_PENDING(job_ptr)) {
			info("wiki: change job %u name %s", jobid, name_ptr);
			xfree(job_ptr->name);
			job_ptr->name = xstrdup(name_ptr);
			last_job_update = now;
			update_accounting = true;
		} else {
			error("wiki: MODIFYJOB name of non-pending job %u",
			      jobid);
			return ESLURM_DISABLED;
		}
	}

	if (new_hostlist) {
		int rc = 0, task_cnt;
		hostlist_t hl;
		char *tasklist;

		if (!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
			/* Job is done, nothing to reset */
			if (new_hostlist == '\0')
				goto host_fini;
			error("wiki: MODIFYJOB hostlist of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}

		xfree(job_ptr->details->req_nodes);
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
		if (new_hostlist == '\0')
			goto host_fini;

		tasklist = moab2slurm_task_list(new_hostlist, &task_cnt);
		if (tasklist == NULL) {
			rc = 1;
			goto host_fini;
		}
		hl = hostlist_create(tasklist);
		if (hl == 0) {
			rc = 1;
			goto host_fini;
		}
		hostlist_uniq(hl);
		hostlist_sort(hl);
		job_ptr->details->req_nodes =
			hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);
		if (job_ptr->details->req_nodes == NULL) {
			rc = 1;
			goto host_fini;
		}
		if (node_name2bitmap(job_ptr->details->req_nodes, false,
                                     &job_ptr->details->req_node_bitmap)) {
			rc = 1;
			goto host_fini;
		}

host_fini:	if (rc) {
			info("wiki: change job %u invalid hostlist %s",
				jobid, new_hostlist);
			xfree(job_ptr->details->req_nodes);
			return EINVAL;
		} else {
			info("wiki: change job %u hostlist %s",
				jobid, new_hostlist);
			update_accounting = true;
		}
	}

	if (part_name_ptr) {
		struct part_record *part_ptr;
		if (!IS_JOB_PENDING(job_ptr)) {
			error("wiki: MODIFYJOB partition of non-pending "
			      "job %u", jobid);
			return ESLURM_DISABLED;
		}

		part_ptr = find_part_record(part_name_ptr);
		if (part_ptr == NULL) {
			error("wiki: MODIFYJOB has invalid partition %s",
				part_name_ptr);
			return ESLURM_INVALID_PARTITION_NAME;
		}

		info("wiki: change job %u partition %s",
			jobid, part_name_ptr);
		xfree(job_ptr->partition);
		job_ptr->partition = xstrdup(part_name_ptr);
		job_ptr->part_ptr = part_ptr;
		last_job_update = now;
		update_accounting = true;
	}

	if (new_node_cnt) {
		job_desc_msg_t job_desc;
#ifdef HAVE_BG
		uint16_t geometry[SYSTEM_DIMENSIONS] = {(uint16_t) NO_VAL};
		static uint16_t cpus_per_node = 0;
		if (!cpus_per_node) {
			select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT,
						&cpus_per_node);
		}
#endif
		if(!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
			error("wiki: MODIFYJOB node count of non-pending "
			      "job %u", jobid);
			return ESLURM_DISABLED;
		}
		memset(&job_desc, 0, sizeof(job_desc_msg_t));

		job_desc.min_nodes = new_node_cnt;
		job_desc.max_nodes = NO_VAL;
		job_desc.select_jobinfo = select_g_select_jobinfo_alloc();

		select_g_alter_node_cnt(SELECT_SET_NODE_CNT, &job_desc);

		select_g_select_jobinfo_free(job_desc.select_jobinfo);

		job_ptr->details->min_nodes = job_desc.min_nodes;
		if (job_ptr->details->max_nodes &&
		    (job_ptr->details->max_nodes < job_desc.min_nodes))
			job_ptr->details->max_nodes = job_desc.min_nodes;
		info("wiki: change job %u min_nodes to %u",
		     jobid, new_node_cnt);
#ifdef HAVE_BG
		job_ptr->details->min_cpus = job_desc.min_cpus;
		job_ptr->details->max_cpus = job_desc.max_cpus;
		job_ptr->details->pn_min_cpus = job_desc.pn_min_cpus;

		new_node_cnt = job_ptr->details->min_cpus;
		if (cpus_per_node)
			new_node_cnt /= cpus_per_node;

		/* This is only set up so accounting is set up correctly */
		select_g_select_jobinfo_set(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &new_node_cnt);
		/* reset geo since changing this makes any geo
		   potentially invalid */
		select_g_select_jobinfo_set(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_GEOMETRY,
					    geometry);
#endif
		last_job_update = now;
		update_accounting = true;
	}

	if (gres_ptr) {
		char *orig_gres;

		if (!IS_JOB_PENDING(job_ptr)) {
			error("wiki: MODIFYJOB GRES of non-pending job %u",
			      jobid);
			return ESLURM_DISABLED;
		}

		orig_gres = job_ptr->gres;
		job_ptr->gres = NULL;
		if (gres_ptr[0])
			job_ptr->gres = xstrdup(gres_ptr);
		if (gres_plugin_job_state_validate(job_ptr->gres,
						   &job_ptr->gres_list)) {
			error("wiki: MODIFYJOB Invalid GRES=%s", gres_ptr);
			xfree(job_ptr->gres);
			job_ptr->gres = orig_gres;
			return ESLURM_INVALID_GRES;
		}
		xfree(orig_gres);
	}

	if (wckey_ptr) {
		int rc = update_job_wckey("update_job", job_ptr, wckey_ptr);
		if (rc != SLURM_SUCCESS) {
			error("wiki: MODIFYJOB Invalid WCKEY=%s", wckey_ptr);
			return rc;
		}
	}

	if (update_accounting) {
		if (job_ptr->details && job_ptr->details->begin_time) {
			/* Update job record in accounting to reflect
			 * the changes */
			jobacct_storage_g_job_start(acct_db_conn, job_ptr);
		}
	}

	return SLURM_SUCCESS;
}
예제 #14
0
/*
 * The remainder of this file implements the standard SLURM checkpoint API.
 */
extern int slurm_ckpt_op (uint32_t job_id, uint32_t step_id,
			  struct step_record *step_ptr, uint16_t op,
			  uint16_t data, char *image_dir, time_t * event_time,
			  uint32_t *error_code, char **error_msg )
{
	int rc = SLURM_SUCCESS;
	struct check_job_info *check_ptr;
	uint16_t done_sig = 0;
	struct job_record *job_ptr;
	struct node_record *node_ptr;
	pthread_attr_t attr;
	pthread_t ckpt_agent_tid = 0;
	char *nodelist;
	struct ckpt_req *req_ptr;

	/* job/step checked already */
	job_ptr = find_job_record(job_id);
	if (!job_ptr)
		return ESLURM_INVALID_JOB_ID;
	if (step_id == SLURM_BATCH_SCRIPT) {
		check_ptr = (struct check_job_info *)job_ptr->check_job;
		node_ptr = find_first_node_record(job_ptr->node_bitmap);
		nodelist = node_ptr->name;
	} else {
		step_ptr = find_step_record(job_ptr, step_id);
		if (!step_ptr)
			return ESLURM_INVALID_JOB_ID;
		check_ptr = (struct check_job_info *)step_ptr->check_job;
		nodelist = step_ptr->step_layout->node_list;
	}
	xassert(check_ptr);

	switch (op) {
	case CHECK_ABLE:
		if (check_ptr->disabled)
			rc = ESLURM_DISABLED;
		else {
			*event_time = check_ptr->time_stamp;
			rc = SLURM_SUCCESS;
		}
		break;
	case CHECK_DISABLE:
		check_ptr->disabled++;
		break;
	case CHECK_ENABLE:
		check_ptr->disabled--;
		break;
	case CHECK_REQUEUE:
		if (step_id != SLURM_BATCH_SCRIPT) {
			rc = ESLURM_NOT_SUPPORTED;
			break;
		}
		/* no break */
	case CHECK_VACATE:
		done_sig = SIGTERM;
		/* no break */
	case CHECK_CREATE:
		if (check_ptr->disabled) {
			rc = ESLURM_DISABLED;
			break;
		}
		if (check_ptr->time_stamp != 0) {
			rc = EALREADY;
			break;
		}

		check_ptr->time_stamp = time(NULL);
		check_ptr->error_code = 0;
		xfree(check_ptr->error_msg);

		req_ptr = xmalloc(sizeof(struct ckpt_req));
		if (!req_ptr) {
			rc = ENOMEM;
			break;
		}
		req_ptr->gid = job_ptr->group_id;
		req_ptr->uid = job_ptr->user_id;
		req_ptr->job_id = job_id;
		req_ptr->step_id = step_id;
		req_ptr->begin_time = check_ptr->time_stamp;
		req_ptr->wait = data;
		req_ptr->image_dir = xstrdup(image_dir);
		req_ptr->nodelist = xstrdup(nodelist);
		req_ptr->sig_done = done_sig;
		req_ptr->op = op;

		slurm_attr_init(&attr);
		if (pthread_attr_setdetachstate(&attr,
						PTHREAD_CREATE_DETACHED)) {
			error("pthread_attr_setdetachstate: %m");
			rc = errno;
			break;
		}

		if (pthread_create(&ckpt_agent_tid, &attr, _ckpt_agent_thr,
				   req_ptr)) {
			error("pthread_create: %m");
			rc = errno;
			break;
		}
		slurm_attr_destroy(&attr);

		break;

	case CHECK_RESTART:
		if (step_id != SLURM_BATCH_SCRIPT) {
			rc = ESLURM_NOT_SUPPORTED;
			break;
		}
		/* create a batch job from saved desc */
		rc = ESLURM_NOT_SUPPORTED;
		/* TODO: save job script */
		break;

	case CHECK_ERROR:
		xassert(error_code);
		xassert(error_msg);
		*error_code = check_ptr->error_code;
		xfree(*error_msg);
		*error_msg = xstrdup(check_ptr->error_msg);
		break;
	default:
		error("Invalid checkpoint operation: %d", op);
		rc = EINVAL;
	}

	return rc;
}
예제 #15
0
파일: gang.c 프로젝트: corburn/slurm
static void _preempt_job_dequeue(void)
{
	struct job_record *job_ptr;
	uint32_t job_id, *tmp_id;
	uint16_t preempt_mode;

	xassert(preempt_job_list);
	while ((tmp_id = list_pop(preempt_job_list))) {
		int rc = SLURM_ERROR;
		job_id = *tmp_id;
		xfree(tmp_id);

		if ((job_ptr = find_job_record(job_id)) == NULL) {
			error("_preempt_job_dequeue could not find job %u",
			      job_id);
			continue;
		}
		preempt_mode = slurm_job_preempt_mode(job_ptr);

		if (preempt_mode == PREEMPT_MODE_SUSPEND) {
			if ((rc = _suspend_job(job_id)) == ESLURM_DISABLED)
				rc = SLURM_SUCCESS;
		} else if (preempt_mode == PREEMPT_MODE_CANCEL) {
			rc = job_signal(job_ptr->job_id, SIGKILL, 0, 0, true);
			if (rc == SLURM_SUCCESS) {
				info("preempted job %u has been killed",
				     job_ptr->job_id);
			}
		} else if (preempt_mode == PREEMPT_MODE_CHECKPOINT) {
			checkpoint_msg_t ckpt_msg;
			memset(&ckpt_msg, 0, sizeof(checkpoint_msg_t));
			ckpt_msg.op	   = CHECK_REQUEUE;
			ckpt_msg.job_id    = job_ptr->job_id;
			rc = job_checkpoint(&ckpt_msg, 0, -1,
					    (uint16_t)NO_VAL);
			if (rc == ESLURM_NOT_SUPPORTED) {
				memset(&ckpt_msg, 0, sizeof(checkpoint_msg_t));
				ckpt_msg.op	   = CHECK_VACATE;
				ckpt_msg.job_id    = job_ptr->job_id;
				rc = job_checkpoint(&ckpt_msg, 0, -1,
						    (uint16_t)NO_VAL);
			}
			if (rc == SLURM_SUCCESS) {
				info("preempted job %u has been checkpointed",
				     job_ptr->job_id);
			} else
				error("preempted job %u could not be "
				      "checkpointed: %s",
				      job_ptr->job_id, slurm_strerror(rc));
		} else if ((preempt_mode == PREEMPT_MODE_REQUEUE) &&
			   job_ptr->batch_flag && job_ptr->details &&
			   (job_ptr->details->requeue > 0)) {
			rc = job_requeue(0, job_ptr->job_id, -1,
					 (uint16_t)NO_VAL, true, 0);
			if (rc == SLURM_SUCCESS) {
				info("preempted job %u has been requeued",
				     job_ptr->job_id);
			} else
				error("preempted job %u could not be "
				      "requeued: %s",
				      job_ptr->job_id, slurm_strerror(rc));
		}

		if (rc != SLURM_SUCCESS) {
			rc = job_signal(job_ptr->job_id, SIGKILL, 0, 0, true);
			if (rc == SLURM_SUCCESS)
				info("preempted job %u had to be killed",
				     job_ptr->job_id);
			else {
				info("preempted job %u kill failure %s",
				     job_ptr->job_id, slurm_strerror(rc));
			}
		}
	}

	return;
}
예제 #16
0
/*
 * get_jobs - get information on specific job(s) changed since some time
 * cmd_ptr IN   - CMD=GETJOBS ARG=[<UPDATETIME>:<JOBID>[:<JOBID>]...]
 *                              [<UPDATETIME>:ALL]
 * err_code OUT - 0 or an error code
 * err_msg OUT  - response message
 * NOTE: xfree() err_msg if err_code is zero
 * RET 0 on success, -1 on failure
 *
 * Response format
 * ARG=<cnt>#<JOBID>;
 *	STATE=<state>;			Moab equivalent job state
 *	[EXITCODE=<number>;]		Job exit code, if completed
 *	[RFEATURES=<features>;]		required features, if any,
 *					NOTE: OR operator not supported
 *	[HOSTLIST=<node1:node2>;]	list of required nodes, if any
 *	[EXCLUDE_HOSTLIST=<node1:node2>;list of excluded nodes, if any
 *	[STARTDATE=<uts>;]		earliest start time, if any
 *	[MAXNODES=<nodes>;]		maximum number of nodes, 0 if no limit
 *	[TASKLIST=<node1:node2>;]	nodes in use, if running or completing
 *	[REJMESSAGE=<str>;]		reason job is not running, if any
 *	[IWD=<directory>;]		Initial Working Directory
 *	[FLAGS=INTERACTIVE;]		set if interactive (not batch) job
 *	[GRES=<name>[:<count>[*cpus]],...;] generic resources required by the
 *					job on a per node basis
 *	[WCKEY=<key>;]			workload characterization key for job
 *	UPDATETIME=<uts>;		time last active
 *	WCLIMIT=<secs>;			wall clock time limit, seconds
 *	TASKS=<cpus>;			CPUs required
 *	NODES=<nodes>;			count of nodes required or allocated
 *	DPROCS=<cpus_per_task>;		count of CPUs required per task
 *	QUEUETIME=<uts>;		submission time
 *	STARTTIME=<uts>;		time execution started
 *	RCLASS=<partition>;		SLURM partition name
 *	RMEM=<MB>;			MB of memory required
 *	RDISK=<MB>;			MB of disk space required
 *	[COMMENT=<whatever>;]		job dependency or account number
 *	[COMPLETETIME=<uts>;]		termination time
 *	[SUSPENDTIME=<secs>;]		seconds that job has been suspended
 *	UNAME=<user_name>;		user name
 *	GNAME=<group_name>;		group name
 *	NAME=<job_name>;		job name
 * [#<JOBID>;...];			additional jobs, if any
 *
 */
extern int	get_jobs(char *cmd_ptr, int *err_code, char **err_msg)
{
	char *arg_ptr = NULL, *tmp_char = NULL, *tmp_buf = NULL, *buf = NULL;
	time_t update_time;
	/* Locks: read job, partition */
	slurmctld_lock_t job_read_lock = {
		NO_LOCK, READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK };
	int job_rec_cnt = 0, buf_size = 0;

	if (cr_test == 0) {
		select_g_get_info_from_plugin(SELECT_CR_PLUGIN, NULL,
					      &cr_enabled);
		cr_test = 1;
	}

	arg_ptr = strstr(cmd_ptr, "ARG=");
	if (arg_ptr == NULL) {
		*err_code = -300;
		*err_msg = "GETJOBS lacks ARG";
		error("wiki: GETJOBS lacks ARG");
		return -1;
	}
	update_time = (time_t) strtoul(arg_ptr+4, &tmp_char, 10);
	if (tmp_char[0] != ':') {
		*err_code = -300;
		*err_msg = "Invalid ARG value";
		error("wiki: GETJOBS has invalid ARG value");
		return -1;
	}
	if (job_list == NULL) {
		*err_code = -140;
		*err_msg = "Still performing initialization";
		error("wiki: job_list not yet initilized");
		return -1;
	}
	tmp_char++;
	lock_slurmctld(job_read_lock);
	if (xstrncmp(tmp_char, "ALL", 3) == 0) {
		/* report all jobs */
		buf = _dump_all_jobs(&job_rec_cnt, update_time);
	} else {
		struct job_record *job_ptr = NULL;
		char *job_name = NULL, *tmp2_char = NULL;
		uint32_t job_id;

		job_name = strtok_r(tmp_char, ":", &tmp2_char);
		while (job_name) {
			job_id = (uint32_t) strtoul(job_name, NULL, 10);
			job_ptr = find_job_record(job_id);
			tmp_buf = _dump_job(job_ptr, update_time);
			if (job_rec_cnt > 0)
				xstrcat(buf, "#");
			xstrcat(buf, tmp_buf);
			xfree(tmp_buf);
			job_rec_cnt++;
			job_name = strtok_r(NULL, ":", &tmp2_char);
		}
	}
	unlock_slurmctld(job_read_lock);

	/* Prepend ("ARG=%d", job_rec_cnt) to reply message */
	if (buf)
		buf_size = strlen(buf);
	tmp_buf = xmalloc(buf_size + 32);
	if (job_rec_cnt)
		sprintf(tmp_buf, "SC=0 ARG=%d#%s", job_rec_cnt, buf);
	else
		sprintf(tmp_buf, "SC=0 ARG=0#");
	xfree(buf);
	*err_code = 0;
	*err_msg = tmp_buf;
	return 0;
}
예제 #17
0
static int	_job_modify(uint32_t jobid, char *bank_ptr,
			char *depend_ptr, char *new_hostlist,
			uint32_t new_node_cnt, char *part_name_ptr,
			uint32_t new_time_limit)
{
	struct job_record *job_ptr;
	bool update_accounting = false;

	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		error("wiki: MODIFYJOB has invalid jobid %u", jobid);
		return ESLURM_INVALID_JOB_ID;
	}
	if (IS_JOB_FINISHED(job_ptr)) {
		error("wiki: MODIFYJOB jobid %u is finished", jobid);
		return ESLURM_DISABLED;
	}

	if (depend_ptr) {
		int rc = update_job_dependency(job_ptr, depend_ptr);
		if (rc == SLURM_SUCCESS) {
			info("wiki: changed job %u dependency to %s",
				jobid, depend_ptr);
		} else {
			error("wiki: changing job %u dependency to %s",
				jobid, depend_ptr);
			return EINVAL;
		}
	}

	if (new_time_limit) {
		time_t old_time = job_ptr->time_limit;
		job_ptr->time_limit = new_time_limit;
		info("wiki: change job %u time_limit to %u",
			jobid, new_time_limit);
		/* Update end_time based upon change
		 * to preserve suspend time info */
		job_ptr->end_time = job_ptr->end_time +
				((job_ptr->time_limit -
				  old_time) * 60);
		last_job_update = time(NULL);
	}

	if (bank_ptr) {
		if (update_job_account("wiki", job_ptr, bank_ptr)
		   != SLURM_SUCCESS)
			return EINVAL;
		else
			update_accounting = true;
	}

	if (new_hostlist) {
		int rc = 0, task_cnt;
		hostlist_t hl;
		char *tasklist;

		if (!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
			/* Job is done, nothing to reset */
			if (new_hostlist == '\0')
				goto host_fini;
			error("wiki: MODIFYJOB tasklist of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}

		xfree(job_ptr->details->req_nodes);
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
		if (new_hostlist == '\0')
			goto host_fini;

		tasklist = moab2slurm_task_list(new_hostlist, &task_cnt);
		if (tasklist == NULL) {
			rc = 1;
			goto host_fini;
		}
		hl = hostlist_create(tasklist);
		if (hl == 0) {
			rc = 1;
			goto host_fini;
		}
		hostlist_uniq(hl);
		hostlist_sort(hl);
		job_ptr->details->req_nodes =
			hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);
		if (job_ptr->details->req_nodes == NULL) {
			rc = 1;
			goto host_fini;
		}
		if (node_name2bitmap(job_ptr->details->req_nodes, false,
                                     &job_ptr->details->req_node_bitmap)) {
			rc = 1;
			goto host_fini;
		}

host_fini:	if (rc) {
			info("wiki: change job %u invalid hostlist %s",
			     jobid, new_hostlist);
			xfree(job_ptr->details->req_nodes);
			return EINVAL;
		} else {
			info("wiki: change job %u hostlist %s",
			     jobid, new_hostlist);
			update_accounting = true;
		}
	}

	if (part_name_ptr) {
		struct part_record *part_ptr;
		if (!IS_JOB_PENDING(job_ptr)) {
			error("wiki: MODIFYJOB partition of non-pending "
			      "job %u", jobid);
			return ESLURM_DISABLED;
		}

		part_ptr = find_part_record(part_name_ptr);
		if (part_ptr == NULL) {
			error("wiki: MODIFYJOB has invalid partition %s",
				part_name_ptr);
			return ESLURM_INVALID_PARTITION_NAME;
		}
		info("wiki: change job %u partition %s",
			jobid, part_name_ptr);
		xfree(job_ptr->partition);
		job_ptr->partition = xstrdup(part_name_ptr);
		job_ptr->part_ptr = part_ptr;
		last_job_update = time(NULL);
		update_accounting = true;
	}
	if (new_node_cnt) {
		if (IS_JOB_PENDING(job_ptr) && job_ptr->details) {
			job_ptr->details->min_nodes = new_node_cnt;
			if (job_ptr->details->max_nodes
			&&  (job_ptr->details->max_nodes < new_node_cnt))
				job_ptr->details->max_nodes = new_node_cnt;
			info("wiki: change job %u min_nodes to %u",
				jobid, new_node_cnt);
			last_job_update = time(NULL);
			update_accounting = true;
		} else {
			error("wiki: MODIFYJOB node count of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}
	}

	if (update_accounting) {
		/* Update job record in accounting to reflect changes */
		jobacct_storage_job_start_direct(acct_db_conn, job_ptr);
	}

	return SLURM_SUCCESS;
}
예제 #18
0
파일: gang.c 프로젝트: corburn/slurm
/* rebuild data structures from scratch
 *
 * A reconfigure can affect this plugin in these ways:
 * - partitions can be added or removed
 *   - this affects the gs_part_list
 * - nodes can be removed from a partition, or added to a partition
 *   - this affects the size of the active resmap
 *
 * Here's the plan:
 * 1. save a copy of the global structures, and then construct
 *    new ones.
 * 2. load the new partition structures with existing jobs,
 *    confirming the job exists and resizing their resmaps
 *    (if necessary).
 * 3. make sure all partitions are accounted for. If a partition
 *    was removed, make sure any jobs that were in the queue and
 *    that were suspended are resumed. Conversely, if a partition
 *    was added, check for existing jobs that may be contending
 *    for resources that we could begin timeslicing.
 * 4. delete the old global structures and return.
 */
extern int gs_reconfig(void)
{
	int i;
	ListIterator part_iterator;
	struct gs_part *p_ptr, *newp_ptr;
	List old_part_list;
	struct job_record *job_ptr;
	struct gs_job *j_ptr;

	if (!timeslicer_thread_id) {
		/* gs_init() will be called later from read_slurm_conf()
		 * if we are enabling gang scheduling via reconfiguration */
		return SLURM_SUCCESS;
	}

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: entering gs_reconfig");
	pthread_mutex_lock(&data_mutex);

	old_part_list = gs_part_list;
	gs_part_list = NULL;

	/* reset global data */
	gs_fast_schedule = slurm_get_fast_schedule();
	gr_type = _get_gr_type();
	_load_phys_res_cnt();
	_build_parts();

	/* scan the old part list and add existing jobs to the new list */
	part_iterator = list_iterator_create(old_part_list);
	while ((p_ptr = (struct gs_part *) list_next(part_iterator))) {
		newp_ptr = (struct gs_part *) list_find_first(gs_part_list,
							      _find_gs_part,
							      p_ptr->part_name);
		if (!newp_ptr) {
			/* this partition was removed, so resume
			 * any jobs suspended by gang and continue */
			for (i = 0; i < p_ptr->num_jobs; i++) {
				j_ptr = p_ptr->job_list[i];
				if ((j_ptr->sig_state == GS_SUSPEND) &&
				    (j_ptr->job_ptr->priority != 0)) {
					info("resuming job in missing part %s",
					     p_ptr->part_name);
					_resume_job(j_ptr->job_id);
					j_ptr->sig_state = GS_RESUME;
				}
			}
			continue;
		}
		if (p_ptr->num_jobs == 0)
			/* no jobs to transfer */
			continue;
		/* we need to transfer the jobs from p_ptr to new_ptr and
		 * adjust their resmaps (if necessary). then we need to create
		 * the active resmap and adjust the state of each job (if
		 * necessary). NOTE: there could be jobs that only overlap
		 * on nodes that are no longer in the partition, but we're
		 * not going to worry about those cases.
		 *
		 * add the jobs from p_ptr into new_ptr in their current order
		 * to preserve the state of timeslicing.
		 */
		for (i = 0; i < p_ptr->num_jobs; i++) {
			job_ptr = find_job_record(p_ptr->job_list[i]->job_id);
			if (job_ptr == NULL) {
				/* job no longer exists in SLURM, so drop it */
				continue;
			}
			/* resume any job that is suspended by us */
			if (IS_JOB_SUSPENDED(job_ptr) && job_ptr->priority) {
				if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG){
					info("resuming job %u apparently "
					     "suspended by gang",
					     job_ptr->job_id);
				}
				_resume_job(job_ptr->job_id);
			}

			/* transfer the job as long as it is still active */
			if (IS_JOB_SUSPENDED(job_ptr) ||
			    IS_JOB_RUNNING(job_ptr)) {
				_add_job_to_part(newp_ptr, job_ptr);
			}
		}
	}
	list_iterator_destroy(part_iterator);

	/* confirm all jobs. Scan the master job_list and confirm that we
	 * are tracking all jobs */
	_scan_slurm_job_list();

	FREE_NULL_LIST(old_part_list);
	pthread_mutex_unlock(&data_mutex);

	_preempt_job_dequeue();	/* MUST BE OUTSIDE OF data_mutex lock */
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_GANG)
		info("gang: leaving gs_reconfig");

	return SLURM_SUCCESS;
}
예제 #19
0
static int _parse_job_info(void **dest, slurm_parser_enum_t type,
			   const char *key, const char *value,
			   const char *line, char **leftover)
{
	s_p_hashtbl_t *job_tbl;
	char *name = NULL, *tmp = NULL, tmp_name[64];
	uint32_t job_id = 0, size = 0, user_id = 0;
	uint16_t state = 0;
	bb_alloc_t *bb_ptr;
	struct job_record *job_ptr = NULL;
	static s_p_options_t _job_options[] = {
		{"JobID",S_P_STRING},
		{"Name", S_P_STRING},
		{"Size", S_P_STRING},
		{"State", S_P_STRING},
		{NULL}
	};

	*dest = NULL;
	user_id = atoi(value);
	job_tbl = s_p_hashtbl_create(_job_options);
	s_p_parse_line(job_tbl, *leftover, leftover);
	if (s_p_get_string(&tmp, "JobID", job_tbl))
		job_id = atoi(tmp);
	s_p_get_string(&name, "Name", job_tbl);
	if (s_p_get_string(&tmp, "Size", job_tbl))
		size =  _get_size_num(tmp);
	if (s_p_get_string(&tmp, "State", job_tbl))
		state = bb_state_num(tmp);

#if 1
	info("%s: JobID:%u Name:%s Size:%u State:%u UserID:%u",
	     __func__, job_id, name, size, state, user_id);
#endif
	if (job_id) {
		job_ptr = find_job_record(job_id);
		if (!job_ptr) {
			error("%s: Vestigial buffer for job ID %u. "
			      "Clear manually",
			      plugin_type, job_id);
		}
		snprintf(tmp_name, sizeof(tmp_name), "VestigialJob%u", job_id);
		job_id = 0;
		name = tmp_name;
	}
	if (job_ptr) {
		if ((bb_ptr = _find_bb_job_rec(job_ptr)) == NULL) {
			bb_ptr = _alloc_bb_job_rec(job_ptr);
			bb_ptr->state = state;
		}
	} else {
		if ((bb_ptr = _find_bb_name_rec(name, user_id)) == NULL) {
			bb_ptr = _alloc_bb_name_rec(name, user_id);
			bb_ptr->size = size;
			bb_ptr->state = state;
			return SLURM_SUCCESS;
		}
	}

	if (bb_ptr->user_id != user_id) {
		error("%s: User ID mismatch (%u != %u). "
		      "BB UserID=%u JobID=%u Name=%s",
		      plugin_type, bb_ptr->user_id, user_id,
		      bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name);
	}
	if (bb_ptr->size != size) {
		error("%s: Size mismatch (%u != %u). "
		      "BB UserID=%u JobID=%u Name=%s",
		      plugin_type, bb_ptr->size, size,
		      bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name);
		bb_ptr->size = MAX(bb_ptr->size, size);
	}
	if (bb_ptr->state != state) {
		/* State is subject to real-time changes */
		debug("%s: State mismatch (%s != %s). "
		      "BB UserID=%u JobID=%u Name=%s",
		      plugin_type, bb_state_string(bb_ptr->state),
		      bb_state_string(state),
		      bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name);
	}

	return SLURM_SUCCESS;
}
예제 #20
0
/* Checkpoint processing pthread
 * Never returns, but is cancelled on plugin termiantion */
static void *_ckpt_agent_thr(void *arg)
{
	struct ckpt_req *req = (struct ckpt_req *)arg;
	int rc;
	/* Locks: write job */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK };
	struct job_record *job_ptr;
	struct step_record *step_ptr;
	struct check_job_info *check_ptr;

	/* only perform ckpt operation of ONE JOB */
	slurm_mutex_lock(&ckpt_agent_mutex);
	while (ckpt_agent_jobid && ckpt_agent_jobid != req->job_id) {
		pthread_cond_wait(&ckpt_agent_cond, &ckpt_agent_mutex);
	}
	ckpt_agent_jobid = req->job_id;
	ckpt_agent_count ++;
	slurm_mutex_unlock(&ckpt_agent_mutex);

	debug3("checkpoint/blcr: sending checkpoint tasks request %u to %u.%u",
	       req->op, req->job_id, req->step_id);

	rc = checkpoint_tasks(req->job_id, req->step_id, req->begin_time,
			      req->image_dir, req->wait, req->nodelist);
	if (rc != SLURM_SUCCESS) {
		error("checkpoint/blcr: error on checkpoint request %u to "
		      "%u.%u: %s", req->op, req->job_id, req->step_id,
		      slurm_strerror(rc));
	}
	if (req->op == CHECK_REQUEUE)
		_requeue_when_finished(req->job_id);

	lock_slurmctld(job_write_lock);
	job_ptr = find_job_record(req->job_id);
	if (!job_ptr) {
		error("_ckpt_agent_thr: job finished");
		goto out;
	}
	if (req->step_id == SLURM_BATCH_SCRIPT) {	/* batch job */
		check_ptr = (struct check_job_info *)job_ptr->check_job;
	} else {
		step_ptr = find_step_record(job_ptr, req->step_id);
		if (! step_ptr) {
			error("_ckpt_agent_thr: step finished");
			goto out;
		}
		check_ptr = (struct check_job_info *)step_ptr->check_job;
	}
	check_ptr->time_stamp = 0;
	check_ptr->error_code = rc;
	if (check_ptr->error_code != SLURM_SUCCESS)
		check_ptr->error_msg = xstrdup(slurm_strerror(rc));

 out:
	unlock_slurmctld(job_write_lock);

	if (req->sig_done) {
		_send_sig(req->job_id, req->step_id, req->sig_done,
			  req->nodelist);
	}

	_on_ckpt_complete(req->gid, req->uid, req->job_id, req->step_id,
			  req->image_dir, rc);

	slurm_mutex_lock(&ckpt_agent_mutex);
	ckpt_agent_count --;
	if (ckpt_agent_count == 0) {
		ckpt_agent_jobid = 0;
		pthread_cond_broadcast(&ckpt_agent_cond);
	}
	slurm_mutex_unlock(&ckpt_agent_mutex);
	_ckpt_req_free(req);
	return NULL;
}
예제 #21
0
파일: agent.c 프로젝트: alepharchives/slurm
/* Test if a batch launch request should be defered
 * RET -1: abort the request, pending job cancelled
 *      0: execute the request now
 *      1: defer the request
 */
static int _batch_launch_defer(queued_request_t *queued_req_ptr)
{
	agent_arg_t *agent_arg_ptr;
	batch_job_launch_msg_t *launch_msg_ptr;
	time_t now = time(NULL);
	struct job_record  *job_ptr;
	int delay_time, nodes_ready = 0;

	agent_arg_ptr = queued_req_ptr->agent_arg_ptr;
	if (agent_arg_ptr->msg_type != REQUEST_BATCH_JOB_LAUNCH)
		return 0;

	if (difftime(now, queued_req_ptr->last_attempt) < 10) {
		/* Reduce overhead by only testing once every 10 secs */
		return 1;
	}

	launch_msg_ptr = (batch_job_launch_msg_t *)agent_arg_ptr->msg_args;
	job_ptr = find_job_record(launch_msg_ptr->job_id);
	if ((job_ptr == NULL) ||
	    (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr))) {
		info("agent(batch_launch): removed pending request for "
		     "cancelled job %u",
		     launch_msg_ptr->job_id);
		return -1;	/* job cancelled while waiting */
	}

	if (job_ptr->wait_all_nodes) {
		(void) job_node_ready(launch_msg_ptr->job_id, &nodes_ready);
	} else {
#ifdef HAVE_FRONT_END
		nodes_ready = 1;
#else
		struct node_record *node_ptr;
		char *hostname;

		hostname = hostlist_deranged_string_xmalloc(
					agent_arg_ptr->hostlist);
		node_ptr = find_node_record(hostname);
		if (node_ptr == NULL) {
			error("agent(batch_launch) removed pending request for "
			      "job %u, missing node %s",
			      launch_msg_ptr->job_id, hostname);
			xfree(hostname);
			return -1;	/* invalid request?? */
		}
		xfree(hostname);
		if (!IS_NODE_POWER_SAVE(node_ptr) &&
		    !IS_NODE_NO_RESPOND(node_ptr)) {
			nodes_ready = 1;
		}
#endif
	}

	delay_time = difftime(now, job_ptr->start_time);
	if (nodes_ready) {
		/* ready to launch, adjust time limit for boot time */
		if (delay_time && (job_ptr->time_limit != INFINITE) &&
		    (!wiki2_sched)) {
			info("Job %u launch delayed by %d secs, "
			     "updating end_time",
			     launch_msg_ptr->job_id, delay_time);
			job_ptr->end_time += delay_time;
		}
		queued_req_ptr->last_attempt = (time_t) 0;
		return 0;
	}

	if (queued_req_ptr->last_attempt == 0) {
		queued_req_ptr->first_attempt = now;
		queued_req_ptr->last_attempt  = now;
	} else if (difftime(now, queued_req_ptr->first_attempt) >=
				 slurm_get_resume_timeout()) {
		error("agent waited too long for nodes to respond, "
		      "sending batch request anyway...");
		if (delay_time && (job_ptr->time_limit != INFINITE) &&
		    (!wiki2_sched)) {
			info("Job %u launch delayed by %d secs, "
			     "updating end_time",
			     launch_msg_ptr->job_id, delay_time);
			job_ptr->end_time += delay_time;
		}
		queued_req_ptr->last_attempt = (time_t) 0;
		return 0;
	}

	queued_req_ptr->last_attempt  = now;
	return 1;
}
예제 #22
0
/* Handle timeout of burst buffer events:
 * 1. Purge per-job burst buffer records when the stage-out has completed and
 *    the job has been purged from Slurm
 * 2. Test for StageInTimeout events
 * 3. Test for StageOutTimeout events
 */
static void _timeout_bb_rec(void)
{
	struct job_record *job_ptr;
	bb_alloc_t **bb_pptr, *bb_ptr = NULL;
	uint32_t age;
	time_t now = time(NULL);
	int i;

	for (i = 0; i < BB_HASH_SIZE; i++) {
		bb_pptr = &bb_state.bb_ahash[i];
		bb_ptr = bb_state.bb_ahash[i];
		while (bb_ptr) {
			if (bb_ptr->seen_time < bb_state.last_load_time) {
				if (bb_ptr->job_id == 0) {
					info("%s: Persistent burst buffer %s "
					     "purged",
					     __func__, bb_ptr->name);
				} else if (bb_state.bb_config.debug_flag) {
					info("%s: burst buffer for job %u "
					     "purged",
					     __func__, bb_ptr->job_id);
				}
//FIXME: VESTIGIAL: Use bb_limit_rem
//				bb_remove_user_load(bb_ptr, &bb_state);
				*bb_pptr = bb_ptr->next;
				bb_free_alloc_buf(bb_ptr);
				break;
			}
			if ((bb_ptr->job_id != 0) &&
			    (bb_ptr->state >= BB_STATE_STAGED_OUT) &&
			    !find_job_record(bb_ptr->job_id)) {
				_stop_stage_out(bb_ptr->job_id);
				bb_ptr->cancelled = true;
				bb_ptr->end_time = 0;
				*bb_pptr = bb_ptr->next;
				bb_free_alloc_buf(bb_ptr);
				break;
			}
			age = difftime(now, bb_ptr->state_time);
			if ((bb_ptr->job_id != 0) &&
			    bb_state.bb_config.stop_stage_in &&
			    (bb_ptr->state == BB_STATE_STAGING_IN) &&
			    (bb_state.bb_config.stage_in_timeout != 0) &&
			    (!bb_ptr->cancelled) &&
			    (age >= bb_state.bb_config.stage_in_timeout)) {
				_stop_stage_in(bb_ptr->job_id);
				bb_ptr->cancelled = true;
				bb_ptr->end_time = 0;
				job_ptr = find_job_record(bb_ptr->job_id);
				if (job_ptr) {
					error("%s: StageIn timed out, holding "
					      "job %u",
					      __func__, bb_ptr->job_id);
					job_ptr->priority = 0;
					job_ptr->direct_set_prio = 1;
					job_ptr->state_reason = WAIT_HELD;
					xfree(job_ptr->state_desc);
					job_ptr->state_desc = xstrdup(
						"Burst buffer stage-in timeout");
					last_job_update = now;
				} else {
					error("%s: StageIn timed out for "
					      "vestigial job %u ",
					      __func__, bb_ptr->job_id);
				}
			}
			if ((bb_ptr->job_id != 0) &&
			    bb_state.bb_config.stop_stage_out &&
			    (bb_ptr->state == BB_STATE_STAGING_OUT) &&
			    (bb_state.bb_config.stage_out_timeout != 0) &&
			    (!bb_ptr->cancelled) &&
			    (age >= bb_state.bb_config.stage_out_timeout)) {
				error("%s: StageOut for job %u timed out",
				      __func__, bb_ptr->job_id);
				_stop_stage_out(bb_ptr->job_id);
				bb_ptr->cancelled = true;
				bb_ptr->end_time = 0;
			}
			bb_pptr = &bb_ptr->next;
			bb_ptr = bb_ptr->next;
		}
	}
}
예제 #23
0
static char *	_will_run_test(uint32_t jobid, time_t start_time,
			       char *node_list, int *err_code, char **err_msg)
{
	struct job_record *job_ptr = NULL;
	struct part_record *part_ptr;
	bitstr_t *avail_bitmap = NULL, *resv_bitmap = NULL;
	bitstr_t *exc_core_bitmap = NULL;
	char *hostlist, *reply_msg = NULL;
	uint32_t min_nodes, max_nodes, req_nodes;
	int rc;
	time_t start_res, orig_start_time;
	List preemptee_candidates;

	debug2("wiki2: will_run job_id=%u start_time=%u node_list=%s",
		jobid, (uint32_t)start_time, node_list);

	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		*err_code = -700;
		*err_msg = "No such job";
		error("wiki: Failed to find job %u", jobid);
		return NULL;
	}
	if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) {
		*err_code = -700;
		*err_msg = "WillRun not applicable to non-pending job";
		error("wiki: WillRun on non-pending job %u", jobid);
		return NULL;
	}

	part_ptr = job_ptr->part_ptr;
	if (part_ptr == NULL) {
		*err_code = -700;
		*err_msg = "Job lacks a partition";
		error("wiki: Job %u lacks a partition", jobid);
		return NULL;
	}

	if ((node_list == NULL) || (node_list[0] == '\0')) {
		/* assume all nodes available to job for testing */
		avail_bitmap = bit_copy(avail_node_bitmap);
	} else if (node_name2bitmap(node_list, false, &avail_bitmap) != 0) {
		*err_code = -700;
		*err_msg = "Invalid available nodes value";
		error("wiki: Attempt to set invalid available node "
		      "list for job %u, %s", jobid, node_list);
		return NULL;
	}

	/* Enforce reservation: access control, time and nodes */
	start_res = start_time;
	rc = job_test_resv(job_ptr, &start_res, true, &resv_bitmap,
			   &exc_core_bitmap);
	if (rc != SLURM_SUCCESS) {
		*err_code = -730;
		*err_msg = "Job denied access to reservation";
		error("wiki: reservation access denied for job %u", jobid);
		FREE_NULL_BITMAP(avail_bitmap);
		return NULL;
	}
	start_time = MAX(start_time, start_res);
	bit_and(avail_bitmap, resv_bitmap);
	FREE_NULL_BITMAP(resv_bitmap);

	/* Only consider nodes that are not DOWN or DRAINED */
	bit_and(avail_bitmap, avail_node_bitmap);

	/* Consider only nodes in this job's partition */
	if (part_ptr->node_bitmap)
		bit_and(avail_bitmap, part_ptr->node_bitmap);
	else {
		*err_code = -730;
		*err_msg = "Job's partition has no nodes";
		error("wiki: no nodes in partition %s for job %u",
			part_ptr->name, jobid);
		FREE_NULL_BITMAP(avail_bitmap);
		return NULL;
	}

	if (job_req_node_filter(job_ptr, avail_bitmap) != SLURM_SUCCESS) {
		/* Job probably has invalid feature list */
		*err_code = -730;
		*err_msg = "Job's required features not available "
			   "on selected nodes";
		error("wiki: job %u not runnable on hosts=%s",
		      jobid, node_list);
		FREE_NULL_BITMAP(avail_bitmap);
		return NULL;
	}
	if (job_ptr->details->exc_node_bitmap) {
		bit_not(job_ptr->details->exc_node_bitmap);
		bit_and(avail_bitmap, job_ptr->details->exc_node_bitmap);
		bit_not(job_ptr->details->exc_node_bitmap);
	}
	if ((job_ptr->details->req_node_bitmap) &&
	    (!bit_super_set(job_ptr->details->req_node_bitmap,
			    avail_bitmap))) {
		*err_code = -730;
		*err_msg = "Job's required nodes not available";
		error("wiki: job %u not runnable on hosts=%s",
		      jobid, node_list);
		FREE_NULL_BITMAP(avail_bitmap);
		return NULL;
	}

	min_nodes = MAX(job_ptr->details->min_nodes, part_ptr->min_nodes);
	if (job_ptr->details->max_nodes == 0)
		max_nodes = part_ptr->max_nodes;
	else
		max_nodes = MIN(job_ptr->details->max_nodes,
				part_ptr->max_nodes);
	max_nodes = MIN(max_nodes, 500000); /* prevent overflows */
	if (job_ptr->details->max_nodes)
		req_nodes = max_nodes;
	else
		req_nodes = min_nodes;
	if (min_nodes > max_nodes) {
		/* job's min_nodes exceeds partitions max_nodes */
		*err_code = -730;
		*err_msg = "Job's min_nodes > max_nodes";
		error("wiki: job %u not runnable on hosts=%s",
		      jobid, node_list);
		FREE_NULL_BITMAP(avail_bitmap);
		return NULL;
	}

	preemptee_candidates = slurm_find_preemptable_jobs(job_ptr);

	orig_start_time = job_ptr->start_time;
	rc = select_g_job_test(job_ptr, avail_bitmap,
			       min_nodes, max_nodes, req_nodes,
			       SELECT_MODE_WILL_RUN,
			       preemptee_candidates, NULL, exc_core_bitmap);
	if (preemptee_candidates)
		list_destroy(preemptee_candidates);

	if (rc == SLURM_SUCCESS) {
		char tmp_str[128];
		*err_code = 0;
		uint32_t proc_cnt = 0;

		xstrcat(reply_msg, "STARTINFO=");
#ifdef HAVE_BG
		select_g_select_jobinfo_get(job_ptr->select_jobinfo,
                             		    SELECT_JOBDATA_NODE_CNT,
					    &proc_cnt);

#else
		proc_cnt = job_ptr->total_cpus;
#endif
		snprintf(tmp_str, sizeof(tmp_str), "%u:%u@%u,",
			 jobid, proc_cnt, (uint32_t) job_ptr->start_time);
		xstrcat(reply_msg, tmp_str);
		hostlist = bitmap2node_name(avail_bitmap);
		xstrcat(reply_msg, hostlist);
		xfree(hostlist);
	} else {
		xstrcat(reply_msg, "Jobs not runable on selected nodes");
		error("wiki: jobs not runnable on nodes");
	}

	/* Restore pending job's expected start time */
	job_ptr->start_time = orig_start_time;
	FREE_NULL_BITMAP(avail_bitmap);
	return reply_msg;
}
예제 #24
0
파일: start_job.c 프로젝트: VURM/slurm
/*
 * Attempt to start a job
 * jobid     (IN) - job id
 * task_cnt  (IN) - total count of tasks to start
 * hostlist  (IN) - SLURM hostlist expression with no repeated hostnames
 * tasklist  (IN/OUT) - comma separated list of hosts with tasks to be started,
 *                  list hostname once per task to start
 * comment_ptr (IN) - new comment field for the job or NULL for no change
 * err_code (OUT) - Moab error code
 * err_msg  (OUT) - Moab error message
 */
static int	_start_job(uint32_t jobid, int task_cnt, char *hostlist,
			char *tasklist, char *comment_ptr,
			int *err_code, char **err_msg)
{
	int rc = 0, old_task_cnt = 1;
	struct job_record *job_ptr;
	/* Write lock on job info, read lock on node info */
	slurmctld_lock_t job_write_lock = {
		NO_LOCK, WRITE_LOCK, READ_LOCK, NO_LOCK };
	char *new_node_list = NULL;
	static char tmp_msg[128];
	bitstr_t *new_bitmap = (bitstr_t *) NULL;
	bitstr_t *save_req_bitmap = (bitstr_t *) NULL;
	bitoff_t i, bsize;
	int ll; /* layout info index */
	char *node_name, *node_idx, *node_cur, *save_req_nodes = NULL;
	size_t node_name_len;
	static uint32_t cr_test = 0, cr_enabled = 0;

	if (cr_test == 0) {
		select_g_get_info_from_plugin(SELECT_CR_PLUGIN, NULL,
						&cr_enabled);
		cr_test = 1;
	}

	lock_slurmctld(job_write_lock);
	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		*err_code = -700;
		*err_msg = "No such job";
		error("wiki: Failed to find job %u", jobid);
		rc = -1;
		goto fini;
	}

	if ((job_ptr->details == NULL) || (!IS_JOB_PENDING(job_ptr))) {
		*err_code = -700;
		*err_msg = "Job not pending, can't start";
		error("wiki: Attempt to start job %u in state %s",
			jobid, job_state_string(job_ptr->job_state));
		rc = -1;
		goto fini;
	}

	if (comment_ptr) {
		char *reserved = strstr(comment_ptr, "RESERVED:");
		if (reserved) {
			reserved += 9;
			job_ptr->details->reserved_resources =
				strtol(reserved, NULL, 10);
		}
		xfree(job_ptr->comment);
		job_ptr->comment = xstrdup(comment_ptr);
	}

	if (task_cnt) {
		new_node_list = xstrdup(hostlist);
		if (node_name2bitmap(new_node_list, false, &new_bitmap) != 0) {
			*err_code = -700;
			*err_msg = "Invalid TASKLIST";
			error("wiki: Attempt to set invalid node list for "
				"job %u, %s",
				jobid, hostlist);
			xfree(new_node_list);
			rc = -1;
			goto fini;
		}

		if (!bit_super_set(new_bitmap, avail_node_bitmap)) {
			/* Selected node is UP and not responding
			 * or it just went DOWN */
			*err_code = -700;
			*err_msg = "TASKLIST includes non-responsive node";
			error("wiki: Attempt to use non-responsive nodes for "
				"job %u, %s",
				jobid, hostlist);
			xfree(new_node_list);
			FREE_NULL_BITMAP(new_bitmap);
			rc = -1;
			goto fini;
		}

		/* User excluded node list incompatible with Wiki
		 * Exclude all nodes not explicitly requested */
		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);
		job_ptr->details->exc_node_bitmap = bit_copy(new_bitmap);
		bit_not(job_ptr->details->exc_node_bitmap);
	}

	/* Build layout information from tasklist (assuming that Moab
	 * sends a non-bracketed list of nodes, repeated as many times
	 * as cpus should be used per node); at this point, node names
	 * are comma-separated. This is _not_ a fast algorithm as it
	 * performs many string compares. */
	xfree(job_ptr->details->req_node_layout);
	if (task_cnt && cr_enabled) {
		uint16_t cpus_per_task = MAX(1, job_ptr->details->cpus_per_task);
		job_ptr->details->req_node_layout = (uint16_t *)
			xmalloc(bit_set_count(new_bitmap) * sizeof(uint16_t));
		bsize = bit_size(new_bitmap);
		for (i = 0, ll = -1; i < bsize; i++) {
			if (!bit_test(new_bitmap, i))
				continue;
			ll++;
			node_name = node_record_table_ptr[i].name;
			node_name_len  = strlen(node_name);
			if (node_name_len == 0)
				continue;
			node_cur = tasklist;
			while (*node_cur) {
				if ((node_idx = strstr(node_cur, node_name))) {
					if ((node_idx[node_name_len] == ',') ||
				 	    (node_idx[node_name_len] == '\0')) {
						job_ptr->details->
							req_node_layout[ll] +=
							cpus_per_task;
					}
					node_cur = strchr(node_idx, ',');
					if (node_cur)
						continue;
				}
				break;
			}
		}
	}

	/* save and update job state to start now */
	save_req_nodes = job_ptr->details->req_nodes;
	job_ptr->details->req_nodes = new_node_list;
	save_req_bitmap = job_ptr->details->req_node_bitmap;
	job_ptr->details->req_node_bitmap = new_bitmap;
	old_task_cnt = job_ptr->details->min_cpus;
	job_ptr->details->min_cpus = MAX(task_cnt, old_task_cnt);
	job_ptr->priority = 100000000;

 fini:	unlock_slurmctld(job_write_lock);
	if (rc)
		return rc;

	/* No errors so far */
	(void) schedule(INFINITE);	/* provides own locking */

	/* Check to insure the job was actually started */
	lock_slurmctld(job_write_lock);
	if (job_ptr->job_id != jobid)
		job_ptr = find_job_record(jobid);

	if (job_ptr && (job_ptr->job_id == jobid) &&
	    (!IS_JOB_RUNNING(job_ptr))) {
		uint16_t wait_reason = 0;
		char *wait_string;

		if (IS_JOB_FAILED(job_ptr))
			wait_string = "Invalid request, job aborted";
		else {
			wait_reason = job_ptr->state_reason;
			if (wait_reason == WAIT_HELD) {
				/* some job is completing, slurmctld did
				 * not even try to schedule this job */
				wait_reason = WAIT_RESOURCES;
			}
			wait_string = job_reason_string(wait_reason);
			job_ptr->state_reason = WAIT_HELD;
			xfree(job_ptr->state_desc);
		}
		*err_code = -910 - wait_reason;
		snprintf(tmp_msg, sizeof(tmp_msg),
			"Could not start job %u(%s): %s",
			jobid, new_node_list, wait_string);
		*err_msg = tmp_msg;
		error("wiki: %s", tmp_msg);

		/* restore some of job state */
		job_ptr->priority = 0;
		job_ptr->details->min_cpus = old_task_cnt;
		rc = -1;
	}

	if (job_ptr && (job_ptr->job_id == jobid) && job_ptr->details) {
		/* Restore required node list in case job requeued */
		xfree(job_ptr->details->req_nodes);
		job_ptr->details->req_nodes = save_req_nodes;
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
		job_ptr->details->req_node_bitmap = save_req_bitmap;
		FREE_NULL_BITMAP(job_ptr->details->exc_node_bitmap);
		xfree(job_ptr->details->req_node_layout);
	} else {
		error("wiki: start_job(%u) job missing", jobid);
		xfree(save_req_nodes);
		FREE_NULL_BITMAP(save_req_bitmap);
	}

	unlock_slurmctld(job_write_lock);
	schedule_node_save();	/* provides own locking */
	schedule_job_save();	/* provides own locking */
	return rc;
}
예제 #25
0
static void _xlate_before(char *depend, uint32_t submit_uid, uint32_t my_job_id)
{
	uint32_t job_id;
	char *last_ptr = NULL, *new_dep = NULL, *tok, *type;
	struct job_record *job_ptr;
        pthread_attr_t attr;
	pthread_t dep_thread;


	tok = strtok_r(depend, ":", &last_ptr);
	if (!xstrcmp(tok, "before"))
		type = "after";
	else if (!xstrcmp(tok, "beforeany"))
		type = "afterany";
	else if (!xstrcmp(tok, "beforenotok"))
		type = "afternotok";
	else if (!xstrcmp(tok, "beforeok"))
		type = "afterok";
	else {
		info("%s: discarding invalid job dependency option %s",
		     plugin_type, tok);
		return;
	}

	/* NOTE: We are updating a job record here in order to implement
	 * the depend=before option. We are doing so without the write lock
	 * on the job record, but using a local mutex to prevent multiple
	 * updates on the same job when multiple jobs satisfying the dependency
	 * are being processed at the same time (all with read locks). The
	 * job read lock will prevent anyone else from getting a job write
	 * lock and using a job write lock causes serious performance problems
	 * for slow job_submit plugins. Not an ideal solution, but the best
	 * option that we see. */
	slurm_mutex_lock(&depend_mutex);
	tok = strtok_r(NULL, ":", &last_ptr);
	while (tok) {
		job_id = atoi(tok);
		job_ptr = find_job_record(job_id);
		if (!job_ptr) {
			info("%s: discarding invalid job dependency before %s",
			     plugin_type, tok);
		} else if ((submit_uid != job_ptr->user_id) &&
			   !validate_super_user(submit_uid)) {
			error("%s: Security violation: uid %u trying to alter "
			      "job %u belonging to uid %u", 
			      plugin_type, submit_uid, job_ptr->job_id,
			      job_ptr->user_id);
		} else if ((!IS_JOB_PENDING(job_ptr)) ||
			   (job_ptr->details == NULL)) {
			info("%s: discarding job before dependency on "
			     "non-pending job %u",
			     plugin_type, job_ptr->job_id);
		} else {
			if (job_ptr->details->dependency) {
				xstrcat(new_dep, job_ptr->details->dependency);
				xstrcat(new_dep, ",");
			}
			xstrfmtcat(new_dep, "%s:%u", type, my_job_id);
			xfree(job_ptr->details->dependency);
			job_ptr->details->dependency = new_dep;
			new_dep = NULL;
			_decr_depend_cnt(job_ptr);

			slurm_attr_init(&attr);
			pthread_attr_setdetachstate(&attr,
						    PTHREAD_CREATE_DETACHED);
			pthread_create(&dep_thread, &attr, _dep_agent, job_ptr);
			slurm_attr_destroy(&attr);
		}
		tok = strtok_r(NULL, ":", &last_ptr);
	}
	slurm_mutex_unlock(&depend_mutex);
}
예제 #26
0
static int _parse_job_info(void **dest, slurm_parser_enum_t type,
			   const char *key, const char *value,
			   const char *line, char **leftover)
{
	s_p_hashtbl_t *job_tbl;
	char *name = NULL, *tmp = NULL, local_name[64] = "";
	uint64_t size = 0;
	uint32_t job_id = 0, user_id = 0;
	uint16_t state = 0;
	bb_alloc_t *bb_ptr;
	struct job_record *job_ptr = NULL;
	bb_job_t *bb_spec;
	static s_p_options_t _job_options[] = {
		{"JobID",S_P_STRING},
		{"Name", S_P_STRING},
		{"Size", S_P_STRING},
		{"State", S_P_STRING},
		{NULL}
	};

	*dest = NULL;
	user_id = strtol(value, NULL, 10);
	job_tbl = s_p_hashtbl_create(_job_options);
	s_p_parse_line(job_tbl, *leftover, leftover);
	if (s_p_get_string(&tmp, "JobID", job_tbl)) {
		job_id = strtol(tmp, NULL, 10);
		xfree(tmp);
	}
	if (s_p_get_string(&name, "Name", job_tbl)) {
		snprintf(local_name, sizeof(local_name), "%s", name);
		xfree(name);
	}
	if (s_p_get_string(&tmp, "Size", job_tbl)) {
		size =  bb_get_size_num(tmp, bb_state.bb_config.granularity);
		xfree(tmp);
	}
	if (s_p_get_string(&tmp, "State", job_tbl)) {
		state = bb_state_num(tmp);
		xfree(tmp);
	}
	s_p_hashtbl_destroy(job_tbl);

#if 0
	info("%s: JobID:%u Name:%s Size:%"PRIu64" State:%u UserID:%u",
	     __func__, job_id, local_name, size, state, user_id);
#endif
	if (job_id) {
		job_ptr = find_job_record(job_id);
		if (!job_ptr && (state == BB_STATE_STAGED_OUT)) {
			struct job_record job_rec;
			job_rec.job_id  = job_id;
			job_rec.user_id = user_id;
			bb_ptr = bb_find_alloc_rec(&bb_state, &job_rec);
			_stop_stage_out(job_id);	/* Purge buffer */
			if (bb_ptr) {
				bb_ptr->cancelled = true;
				bb_ptr->end_time = 0;
			} else {
				/* Slurm knows nothing about this job,
				 * may be result of slurmctld cold start */
				error("%s: Vestigial buffer for purged job %u",
				      plugin_type, job_id);
			}
			return SLURM_SUCCESS;
		} else if (!job_ptr &&
			   ((state == BB_STATE_STAGING_IN) ||
			    (state == BB_STATE_STAGED_IN))) {
			struct job_record job_rec;
			job_rec.job_id  = job_id;
			job_rec.user_id = user_id;
			bb_ptr = bb_find_alloc_rec(&bb_state, &job_rec);
			_stop_stage_in(job_id);		/* Purge buffer */
			if (bb_ptr) {
				bb_ptr->cancelled = true;
				bb_ptr->end_time = 0;
			} else {
				/* Slurm knows nothing about this job,
				 * may be result of slurmctld cold start */
				error("%s: Vestigial buffer for purged job %u",
				      plugin_type, job_id);
			}
			return SLURM_SUCCESS;
		} else if (!job_ptr) {
			error("%s: Vestigial buffer for job ID %u. "
			      "Clear manually",
			      plugin_type, job_id);
		}
		snprintf(local_name, sizeof(local_name), "VestigialJob%u",
			 job_id);
	}
	if (job_ptr) {
		bb_ptr = bb_find_alloc_rec(&bb_state, job_ptr);
		if (bb_ptr == NULL) {
			bb_spec = xmalloc(sizeof(bb_job_t));
			bb_spec->total_size = _get_bb_size(job_ptr);
			bb_ptr = bb_alloc_job_rec(&bb_state, job_ptr, bb_spec);
			xfree(bb_spec);
			bb_ptr->state = state;
			/* bb_ptr->state_time set in bb_alloc_job_rec() */
		}
	} else {
		if ((bb_ptr = _find_bb_name_rec(local_name, user_id)) == NULL) {
			bb_ptr = bb_alloc_name_rec(&bb_state, local_name,
						   user_id);
			bb_ptr->size = size;
			bb_ptr->state = state;
//FIXME: VESTIGIAL: Use bb_limit_add
//			bb_add_user_load(bb_ptr, &bb_state);
			return SLURM_SUCCESS;
		}
	}
	bb_ptr->seen_time = time(NULL); /* used to purge defunct recs */

	/* UserID set to 0 on some failure modes */
	if ((bb_ptr->user_id != user_id) && (user_id != 0)) {
		error("%s: User ID mismatch (%u != %u). "
		      "BB UserID=%u JobID=%u Name=%s",
		      plugin_type, bb_ptr->user_id, user_id,
		      bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name);
	}
	if ((bb_ptr->state == BB_STATE_RUNNING) &&
	    (state == BB_STATE_STAGED_IN))
		state = BB_STATE_RUNNING;	/* More precise state info */
	if (bb_ptr->state != state) {
		/* State is subject to real-time changes */
		debug("%s: State changed (%s to %s). "
		      "BB UserID=%u JobID=%u Name=%s",
		      plugin_type, bb_state_string(bb_ptr->state),
		      bb_state_string(state),
		      bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name);
		bb_ptr->state = state;
		bb_ptr->state_time = time(NULL);
		if (bb_ptr->state == BB_STATE_STAGED_OUT) {
			if (bb_ptr->size != 0) {
//FIXME: VESTIGIAL: Use bb_limit_rem
//				bb_remove_user_load(bb_ptr, &bb_state);
				bb_ptr->size = 0;
			}
		}
		if (bb_ptr->state == BB_STATE_STAGED_IN)
			queue_job_scheduler();
	}
	if ((bb_ptr->state != BB_STATE_STAGED_OUT) && (bb_ptr->size != size)) {
//FIXME: VESTIGIAL: Use bb_limit_rem
//		bb_remove_user_load(bb_ptr, &bb_state);
		if (size != 0) {
			error("%s: Size mismatch (%"PRIu64" != %"PRIu64"). "
			      "BB UserID=%u JobID=%u Name=%s",
			      plugin_type, bb_ptr->size, size,
			      bb_ptr->user_id, bb_ptr->job_id, bb_ptr->name);
		}
		bb_ptr->size = MAX(bb_ptr->size, size);
//FIXME: VESTIGIAL: Use bb_limit_add
//		bb_add_user_load(bb_ptr, &bb_state);
	}

	return SLURM_SUCCESS;
}
예제 #27
0
static void _xlate_before(char *depend, uint32_t submit_uid, uint32_t my_job_id)
{
	uint32_t job_id;
	char *last_ptr = NULL, *new_dep = NULL, *tok, *type;
	struct job_record *job_ptr;
        pthread_attr_t attr;
	pthread_t dep_thread;


	tok = strtok_r(depend, ":", &last_ptr);
	if (!strcmp(tok, "before"))
		type = "after";
	else if (!strcmp(tok, "beforeany"))
		type = "afterany";
	else if (!strcmp(tok, "beforenotok"))
		type = "afternotok";
	else if (!strcmp(tok, "beforeok"))
		type = "afterok";
	else {
		info("%s: discarding invalid job dependency option %s",
		     plugin_type, tok);
		return;
	}

	tok = strtok_r(NULL, ":", &last_ptr);
	while (tok) {
		job_id = atoi(tok);
		job_ptr = find_job_record(job_id);
		if (!job_ptr) {
			info("%s: discarding invalid job dependency before %s",
			     plugin_type, tok);
		} else if ((submit_uid != job_ptr->user_id) &&
			   !validate_super_user(submit_uid)) {
			error("%s: Security violation: uid %u trying to alter "
			      "job %u belonging to uid %u", 
			      plugin_type, submit_uid, job_ptr->job_id,
			      job_ptr->user_id);
		} else if ((!IS_JOB_PENDING(job_ptr)) ||
			   (job_ptr->details == NULL)) {
			info("%s: discarding job before dependency on "
			     "non-pending job %u",
			     plugin_type, job_ptr->job_id);
		} else {
			if (job_ptr->details->dependency) {
				xstrcat(new_dep, job_ptr->details->dependency);
				xstrcat(new_dep, ",");
			}
			xstrfmtcat(new_dep, "%s:%u", type, my_job_id);
			xfree(job_ptr->details->dependency);
			job_ptr->details->dependency = new_dep;
			new_dep = NULL;
			_decr_depend_cnt(job_ptr);

			slurm_attr_init(&attr);
			pthread_attr_setdetachstate(&attr,
						    PTHREAD_CREATE_DETACHED);
			pthread_create(&dep_thread, &attr, _dep_agent, job_ptr);
			slurm_attr_destroy(&attr);
		}
		tok = strtok_r(NULL, ":", &last_ptr);
	}
}