Пример #1
0
/*
 * Create job description structure based off srun options
 * (see opt.h)
 */
job_desc_msg_t *
job_desc_msg_create_from_opts (void)
{
	job_desc_msg_t *j = xmalloc(sizeof(*j));
	hostlist_t hl = NULL;

	slurm_init_job_desc_msg(j);
#if defined HAVE_ALPS_CRAY && defined HAVE_REAL_CRAY
	uint64_t pagg_id = job_getjid(getpid());
	/*
	 * Interactive sessions require pam_job.so in /etc/pam.d/common-session
	 * since creating sgi_job containers requires root permissions. This is
	 * the only exception where we allow the fallback of using the SID to
	 * confirm the reservation (caught later, in do_basil_confirm).
	 */
	if (pagg_id == (uint64_t)-1) {
		error("No SGI job container ID detected - please enable the "
		      "Cray job service via /etc/init.d/job");
	} else {
		if (!j->select_jobinfo)
			j->select_jobinfo = select_g_select_jobinfo_alloc();

		select_g_select_jobinfo_set(j->select_jobinfo,
					    SELECT_JOBDATA_PAGG_ID, &pagg_id);
	}
#endif

	j->contiguous     = opt.contiguous;
	if (opt.core_spec)
		j->core_spec = opt.core_spec;
	j->features       = opt.constraints;
	j->gres           = opt.gres;
	if (opt.immediate == 1)
		j->immediate = opt.immediate;
	if (opt.job_name)
		j->name   = opt.job_name;
	else
		j->name   = opt.cmd_name;
	if (opt.argc > 0) {
		j->argc    = 1;
		j->argv    = (char **) xmalloc(sizeof(char *) * 2);
		j->argv[0] = xstrdup(opt.argv[0]);
	}
	if (opt.acctg_freq)
		j->acctg_freq     = xstrdup(opt.acctg_freq);
	j->reservation    = opt.reservation;
	j->wckey          = opt.wckey;

	j->req_nodes      = xstrdup(opt.nodelist);

	/* simplify the job allocation nodelist,
	 * not laying out tasks until step */
	if (j->req_nodes) {
		hl = hostlist_create(j->req_nodes);
		xfree(opt.nodelist);
		opt.nodelist = hostlist_ranged_string_xmalloc(hl);
		hostlist_uniq(hl);
		xfree(j->req_nodes);
		j->req_nodes = hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);

	}

	if (opt.distribution == SLURM_DIST_ARBITRARY
	   && !j->req_nodes) {
		error("With Arbitrary distribution you need to "
		      "specify a nodelist or hostfile with the -w option");
		return NULL;
	}
	j->exc_nodes      = opt.exc_nodes;
	j->partition      = opt.partition;
	j->min_nodes      = opt.min_nodes;
	if (opt.sockets_per_node != NO_VAL)
		j->sockets_per_node    = opt.sockets_per_node;
	if (opt.cores_per_socket != NO_VAL)
		j->cores_per_socket      = opt.cores_per_socket;
	if (opt.threads_per_core != NO_VAL)
		j->threads_per_core    = opt.threads_per_core;
	j->user_id        = opt.uid;
	j->dependency     = opt.dependency;
	if (opt.nice)
		j->nice   = NICE_OFFSET + opt.nice;
	if (opt.priority)
		j->priority = opt.priority;

	if (opt.cpu_bind)
		j->cpu_bind       = opt.cpu_bind;
	if (opt.cpu_bind_type)
		j->cpu_bind_type  = opt.cpu_bind_type;
	if (opt.mem_bind)
		j->mem_bind       = opt.mem_bind;
	if (opt.mem_bind_type)
		j->mem_bind_type  = opt.mem_bind_type;
	if (opt.plane_size != NO_VAL)
		j->plane_size     = opt.plane_size;
	j->task_dist      = opt.distribution;

	j->group_id       = opt.gid;
	j->mail_type      = opt.mail_type;

	if (opt.ntasks_per_node != NO_VAL)
		j->ntasks_per_node   = opt.ntasks_per_node;
	if (opt.ntasks_per_socket != NO_VAL)
		j->ntasks_per_socket = opt.ntasks_per_socket;
	if (opt.ntasks_per_core != NO_VAL)
		j->ntasks_per_core   = opt.ntasks_per_core;

	if (opt.mail_user)
		j->mail_user = opt.mail_user;
	if (opt.begin)
		j->begin_time = opt.begin;
	if (opt.licenses)
		j->licenses = opt.licenses;
	if (opt.network)
		j->network = opt.network;
	if (opt.profile)
		j->profile = opt.profile;
	if (opt.account)
		j->account = opt.account;
	if (opt.comment)
		j->comment = opt.comment;
	if (opt.qos)
		j->qos = opt.qos;
	if (opt.cwd)
		j->work_dir = opt.cwd;

	if (opt.hold)
		j->priority     = 0;
	if (opt.jobid != NO_VAL)
		j->job_id	= opt.jobid;
#ifdef HAVE_BG
	if (opt.geometry[0] > 0) {
		int i;
		for (i = 0; i < SYSTEM_DIMENSIONS; i++)
			j->geometry[i] = opt.geometry[i];
	}
#endif

	memcpy(j->conn_type, opt.conn_type, sizeof(j->conn_type));

	if (opt.reboot)
		j->reboot = 1;
	if (opt.no_rotate)
		j->rotate = 0;

	if (opt.blrtsimage)
		j->blrtsimage = opt.blrtsimage;
	if (opt.linuximage)
		j->linuximage = opt.linuximage;
	if (opt.mloaderimage)
		j->mloaderimage = opt.mloaderimage;
	if (opt.ramdiskimage)
		j->ramdiskimage = opt.ramdiskimage;

	if (opt.max_nodes)
		j->max_nodes    = opt.max_nodes;
	else if (opt.nodes_set) {
		/* On an allocation if the max nodes isn't set set it
		 * to do the same behavior as with salloc or sbatch.
		 */
		j->max_nodes    = opt.min_nodes;
	}
	if (opt.pn_min_cpus != NO_VAL)
		j->pn_min_cpus    = opt.pn_min_cpus;
	if (opt.pn_min_memory != NO_VAL)
		j->pn_min_memory = opt.pn_min_memory;
	else if (opt.mem_per_cpu != NO_VAL)
		j->pn_min_memory = opt.mem_per_cpu | MEM_PER_CPU;
	if (opt.pn_min_tmp_disk != NO_VAL)
		j->pn_min_tmp_disk = opt.pn_min_tmp_disk;
	if (opt.overcommit) {
		j->min_cpus    = opt.min_nodes;
		j->overcommit  = opt.overcommit;
	} else if (opt.cpus_set)
		j->min_cpus    = opt.ntasks * opt.cpus_per_task;
	else
		j->min_cpus    = opt.ntasks;
	if (opt.ntasks_set)
		j->num_tasks   = opt.ntasks;

	if (opt.cpus_set)
		j->cpus_per_task = opt.cpus_per_task;

	if (opt.no_kill)
		j->kill_on_node_fail   = 0;
	if (opt.time_limit != NO_VAL)
		j->time_limit          = opt.time_limit;
	if (opt.time_min != NO_VAL)
		j->time_min            = opt.time_min;
	j->shared = opt.shared;

	if (opt.warn_signal)
		j->warn_signal = opt.warn_signal;
	if (opt.warn_time)
		j->warn_time = opt.warn_time;

	if (opt.req_switch >= 0)
		j->req_switch = opt.req_switch;
	if (opt.wait4switch >= 0)
		j->wait4switch = opt.wait4switch;

	/* srun uses the same listening port for the allocation response
	 * message as all other messages */
	j->alloc_resp_port = slurmctld_comm_addr.port;
	j->other_port = slurmctld_comm_addr.port;

	if (opt.spank_job_env_size) {
		j->spank_job_env      = opt.spank_job_env;
		j->spank_job_env_size = opt.spank_job_env_size;
	}

	return (j);
}
Пример #2
0
static int	_job_modify(uint32_t jobid, char *bank_ptr,
			char *depend_ptr, char *new_hostlist,
			uint32_t new_node_cnt, char *part_name_ptr,
			uint32_t new_time_limit, char *name_ptr,
			char *start_ptr, char *feature_ptr, char *env_ptr,
			char *comment_ptr, char *gres_ptr, char *wckey_ptr)
{
	struct job_record *job_ptr;
	time_t now = time(NULL);
	bool update_accounting = false;

	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		error("wiki: MODIFYJOB has invalid jobid %u", jobid);
		return ESLURM_INVALID_JOB_ID;
	}
	if (IS_JOB_FINISHED(job_ptr) || (job_ptr->details == NULL)) {
		info("wiki: MODIFYJOB jobid %u is finished", jobid);
		return ESLURM_DISABLED;
	}

	if (comment_ptr) {
		info("wiki: change job %u comment %s", jobid, comment_ptr);
		xfree(job_ptr->comment);
		job_ptr->comment = xstrdup(comment_ptr);
		last_job_update = now;
	}

	if (depend_ptr) {
		int rc = update_job_dependency(job_ptr, depend_ptr);
		if (rc == SLURM_SUCCESS) {
			info("wiki: changed job %u dependency to %s",
				jobid, depend_ptr);
		} else {
			error("wiki: changing job %u dependency to %s",
				jobid, depend_ptr);
			return EINVAL;
		}
	}

	if (env_ptr) {
		bool have_equal = false;
		char old_sep[1];
		int begin = 0, i;

		if (job_ptr->batch_flag == 0) {
			error("wiki: attempt to set environment variables "
			      "for non-batch job %u", jobid);
			return ESLURM_DISABLED;
		}
		for (i=0; ; i++) {
			if (env_ptr[i] == '=') {
				if (have_equal) {
					error("wiki: setting job %u invalid "
					      "environment variables: %s",
					      jobid, env_ptr);
					return EINVAL;
				}
				have_equal = true;
				if (env_ptr[i+1] == '\"') {
					for (i+=2; ; i++) {
						if (env_ptr[i] == '\0') {
							error("wiki: setting job %u "
							      "invalid environment "
							      "variables: %s",
					 		     jobid, env_ptr);
							return EINVAL;
						}
						if (env_ptr[i] == '\"') {
							i++;
							break;
						}
						if (env_ptr[i] == '\\') {
							i++;
						}
					}
				} else if (env_ptr[i+1] == '\'') {
					for (i+=2; ; i++) {
						if (env_ptr[i] == '\0') {
							error("wiki: setting job %u "
							      "invalid environment "
							      "variables: %s",
					 		     jobid, env_ptr);
							return EINVAL;
						}
						if (env_ptr[i] == '\'') {
							i++;
							break;
						}
						if (env_ptr[i] == '\\') {
							i++;
						}
					}
				}
			}
			if (isspace(env_ptr[i]) || (env_ptr[i] == ',')) {
				if (!have_equal) {
					error("wiki: setting job %u invalid "
					      "environment variables: %s",
					      jobid, env_ptr);
					return EINVAL;
				}
				old_sep[0] = env_ptr[i];
				env_ptr[i] = '\0';
				xrealloc(job_ptr->details->env_sup,
					 sizeof(char *) *
					 (job_ptr->details->env_cnt+1));
				job_ptr->details->env_sup
						[job_ptr->details->env_cnt++] =
						xstrdup(&env_ptr[begin]);
				info("wiki: for job %u add env: %s",
				     jobid, &env_ptr[begin]);
				env_ptr[i] = old_sep[0];
				if (isspace(old_sep[0]))
					break;
				begin = i + 1;
				have_equal = false;
			}
		}
	}

	if (new_time_limit) {
		time_t old_time = job_ptr->time_limit;
		job_ptr->time_limit = new_time_limit;
		info("wiki: change job %u time_limit to %u",
			jobid, new_time_limit);
		/* Update end_time based upon change
		 * to preserve suspend time info */
		job_ptr->end_time = job_ptr->end_time +
				((job_ptr->time_limit -
				  old_time) * 60);
		last_job_update = now;
	}

	if (bank_ptr &&
	    (update_job_account("wiki", job_ptr, bank_ptr) != SLURM_SUCCESS)) {
		return EINVAL;
	}

	if (feature_ptr) {
		if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) {
			info("wiki: change job %u features to %s",
				jobid, feature_ptr);
			job_ptr->details->features = xstrdup(feature_ptr);
			last_job_update = now;
		} else {
			error("wiki: MODIFYJOB features of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}
	}

	if (start_ptr) {
		char *end_ptr;
		uint32_t begin_time = strtol(start_ptr, &end_ptr, 10);
		if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) {
			info("wiki: change job %u begin time to %u",
				jobid, begin_time);
			job_ptr->details->begin_time = begin_time;
			last_job_update = now;
			update_accounting = true;
		} else {
			error("wiki: MODIFYJOB begin_time of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}
	}

	if (name_ptr) {
		if (IS_JOB_PENDING(job_ptr)) {
			info("wiki: change job %u name %s", jobid, name_ptr);
			xfree(job_ptr->name);
			job_ptr->name = xstrdup(name_ptr);
			last_job_update = now;
			update_accounting = true;
		} else {
			error("wiki: MODIFYJOB name of non-pending job %u",
			      jobid);
			return ESLURM_DISABLED;
		}
	}

	if (new_hostlist) {
		int rc = 0, task_cnt;
		hostlist_t hl;
		char *tasklist;

		if (!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
			/* Job is done, nothing to reset */
			if (new_hostlist == '\0')
				goto host_fini;
			error("wiki: MODIFYJOB hostlist of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}

		xfree(job_ptr->details->req_nodes);
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
		if (new_hostlist == '\0')
			goto host_fini;

		tasklist = moab2slurm_task_list(new_hostlist, &task_cnt);
		if (tasklist == NULL) {
			rc = 1;
			goto host_fini;
		}
		hl = hostlist_create(tasklist);
		if (hl == 0) {
			rc = 1;
			goto host_fini;
		}
		hostlist_uniq(hl);
		hostlist_sort(hl);
		job_ptr->details->req_nodes =
			hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);
		if (job_ptr->details->req_nodes == NULL) {
			rc = 1;
			goto host_fini;
		}
		if (node_name2bitmap(job_ptr->details->req_nodes, false,
                                     &job_ptr->details->req_node_bitmap)) {
			rc = 1;
			goto host_fini;
		}

host_fini:	if (rc) {
			info("wiki: change job %u invalid hostlist %s",
				jobid, new_hostlist);
			xfree(job_ptr->details->req_nodes);
			return EINVAL;
		} else {
			info("wiki: change job %u hostlist %s",
				jobid, new_hostlist);
			update_accounting = true;
		}
	}

	if (part_name_ptr) {
		struct part_record *part_ptr;
		if (!IS_JOB_PENDING(job_ptr)) {
			error("wiki: MODIFYJOB partition of non-pending "
			      "job %u", jobid);
			return ESLURM_DISABLED;
		}

		part_ptr = find_part_record(part_name_ptr);
		if (part_ptr == NULL) {
			error("wiki: MODIFYJOB has invalid partition %s",
				part_name_ptr);
			return ESLURM_INVALID_PARTITION_NAME;
		}

		info("wiki: change job %u partition %s",
			jobid, part_name_ptr);
		xfree(job_ptr->partition);
		job_ptr->partition = xstrdup(part_name_ptr);
		job_ptr->part_ptr = part_ptr;
		last_job_update = now;
		update_accounting = true;
	}

	if (new_node_cnt) {
		job_desc_msg_t job_desc;
#ifdef HAVE_BG
		uint16_t geometry[SYSTEM_DIMENSIONS] = {(uint16_t) NO_VAL};
		static uint16_t cpus_per_node = 0;
		if (!cpus_per_node) {
			select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT,
						&cpus_per_node);
		}
#endif
		if(!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
			error("wiki: MODIFYJOB node count of non-pending "
			      "job %u", jobid);
			return ESLURM_DISABLED;
		}
		memset(&job_desc, 0, sizeof(job_desc_msg_t));

		job_desc.min_nodes = new_node_cnt;
		job_desc.max_nodes = NO_VAL;
		job_desc.select_jobinfo = select_g_select_jobinfo_alloc();

		select_g_alter_node_cnt(SELECT_SET_NODE_CNT, &job_desc);

		select_g_select_jobinfo_free(job_desc.select_jobinfo);

		job_ptr->details->min_nodes = job_desc.min_nodes;
		if (job_ptr->details->max_nodes &&
		    (job_ptr->details->max_nodes < job_desc.min_nodes))
			job_ptr->details->max_nodes = job_desc.min_nodes;
		info("wiki: change job %u min_nodes to %u",
		     jobid, new_node_cnt);
#ifdef HAVE_BG
		job_ptr->details->min_cpus = job_desc.min_cpus;
		job_ptr->details->max_cpus = job_desc.max_cpus;
		job_ptr->details->pn_min_cpus = job_desc.pn_min_cpus;

		new_node_cnt = job_ptr->details->min_cpus;
		if (cpus_per_node)
			new_node_cnt /= cpus_per_node;

		/* This is only set up so accounting is set up correctly */
		select_g_select_jobinfo_set(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &new_node_cnt);
		/* reset geo since changing this makes any geo
		   potentially invalid */
		select_g_select_jobinfo_set(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_GEOMETRY,
					    geometry);
#endif
		last_job_update = now;
		update_accounting = true;
	}

	if (gres_ptr) {
		char *orig_gres;

		if (!IS_JOB_PENDING(job_ptr)) {
			error("wiki: MODIFYJOB GRES of non-pending job %u",
			      jobid);
			return ESLURM_DISABLED;
		}

		orig_gres = job_ptr->gres;
		job_ptr->gres = NULL;
		if (gres_ptr[0])
			job_ptr->gres = xstrdup(gres_ptr);
		if (gres_plugin_job_state_validate(job_ptr->gres,
						   &job_ptr->gres_list)) {
			error("wiki: MODIFYJOB Invalid GRES=%s", gres_ptr);
			xfree(job_ptr->gres);
			job_ptr->gres = orig_gres;
			return ESLURM_INVALID_GRES;
		}
		xfree(orig_gres);
	}

	if (wckey_ptr) {
		int rc = update_job_wckey("update_job", job_ptr, wckey_ptr);
		if (rc != SLURM_SUCCESS) {
			error("wiki: MODIFYJOB Invalid WCKEY=%s", wckey_ptr);
			return rc;
		}
	}

	if (update_accounting) {
		if (job_ptr->details && job_ptr->details->begin_time) {
			/* Update job record in accounting to reflect
			 * the changes */
			jobacct_storage_g_job_start(acct_db_conn, job_ptr);
		}
	}

	return SLURM_SUCCESS;
}
Пример #3
0
/* create a slurmd job structure from a launch tasks message */
extern stepd_step_rec_t *
stepd_step_rec_create(launch_tasks_request_msg_t *msg)
{
	stepd_step_rec_t  *job = NULL;
	srun_info_t   *srun = NULL;
	slurm_addr_t     resp_addr;
	slurm_addr_t     io_addr;
	int            i, nodeid = NO_VAL;

	xassert(msg != NULL);
	xassert(msg->complete_nodelist != NULL);
	debug3("entering stepd_step_rec_create");

	if (!_valid_uid_gid((uid_t)msg->uid, &(msg->gid), &(msg->user_name)))
		return NULL;

	if (_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq))
		return NULL;

	job = xmalloc(sizeof(stepd_step_rec_t));
#ifndef HAVE_FRONT_END
	nodeid = nodelist_find(msg->complete_nodelist, conf->node_name);
	job->node_name = xstrdup(conf->node_name);
#else
	nodeid = 0;
	job->node_name = xstrdup(msg->complete_nodelist);
#endif
	if (nodeid < 0) {
		error("couldn't find node %s in %s",
		      job->node_name, msg->complete_nodelist);
		stepd_step_rec_destroy(job);
		return NULL;
	}

	job->state	= SLURMSTEPD_STEP_STARTING;
	job->node_tasks	= msg->tasks_to_launch[nodeid];
	job->ntasks	= msg->ntasks;
	job->jobid	= msg->job_id;
	job->stepid	= msg->job_step_id;

	job->uid	= (uid_t) msg->uid;
	job->user_name  = xstrdup(msg->user_name);
	job->gid	= (gid_t) msg->gid;
	job->cwd	= xstrdup(msg->cwd);
	job->task_dist	= msg->task_dist;

	job->cpu_bind_type = msg->cpu_bind_type;
	job->cpu_bind = xstrdup(msg->cpu_bind);
	job->mem_bind_type = msg->mem_bind_type;
	job->mem_bind = xstrdup(msg->mem_bind);
	job->cpu_freq = msg->cpu_freq;
	job->ckpt_dir = xstrdup(msg->ckpt_dir);
	job->restart_dir = xstrdup(msg->restart_dir);
	job->cpus_per_task = msg->cpus_per_task;

	job->env     = _array_copy(msg->envc, msg->env);
	job->array_job_id  = msg->job_id;
	job->array_task_id = (uint16_t) NO_VAL;
	for (i = 0; i < msg->envc; i++) {
		/*                         1234567890123456789 */
		if (!strncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19))
			job->array_job_id = atoi(msg->env[i] + 19);
		/*                         12345678901234567890 */
		if (!strncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20))
			job->array_task_id = atoi(msg->env[i] + 20);
	}

	job->eio     = eio_handle_create();
	job->sruns   = list_create((ListDelF) _srun_info_destructor);
	job->clients = list_create(NULL); /* FIXME! Needs destructor */
	job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
	job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
	job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */
	job->incoming_count = 0;
	job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */
	job->outgoing_count = 0;
	job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */

	job->envtp   = xmalloc(sizeof(env_t));
	job->envtp->jobid = -1;
	job->envtp->stepid = -1;
	job->envtp->procid = -1;
	job->envtp->localid = -1;
	job->envtp->nodeid = -1;

	job->envtp->distribution = 0;
	job->envtp->cpu_bind_type = 0;
	job->envtp->cpu_bind = NULL;
	job->envtp->mem_bind_type = 0;
	job->envtp->mem_bind = NULL;
	job->envtp->ckpt_dir = NULL;
	job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port];

	memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t));
	slurm_set_addr(&resp_addr,
		       msg->resp_port[nodeid % msg->num_resp_port],
		       NULL);
	job->user_managed_io = msg->user_managed_io;
	if (!msg->user_managed_io) {
		memcpy(&io_addr,   &msg->orig_addr, sizeof(slurm_addr_t));
		slurm_set_addr(&io_addr,
			       msg->io_port[nodeid % msg->num_io_port],
			       NULL);
	}

	srun = srun_info_create(msg->cred, &resp_addr, &io_addr);

	job->buffered_stdio = msg->buffered_stdio;
	job->labelio = msg->labelio;

	job->profile     = msg->profile;
	job->task_prolog = xstrdup(msg->task_prolog);
	job->task_epilog = xstrdup(msg->task_epilog);

	job->argc    = msg->argc;
	job->argv    = _array_copy(job->argc, msg->argv);

	job->nnodes  = msg->nnodes;
	job->nodeid  = nodeid;
	job->debug   = msg->slurmd_debug;
	job->cpus    = msg->cpus_allocated[nodeid];

	/* This needs to happen before acct_gather_profile_startpoll
	   and only really looks at the profile in the job.
	*/
	acct_gather_profile_g_node_step_start(job);

	acct_gather_profile_startpoll(msg->acctg_freq,
				      conf->job_acct_gather_freq);

	job->multi_prog  = msg->multi_prog;
	job->timelimit   = (time_t) -1;
	job->task_flags  = msg->task_flags;
	job->switch_job  = msg->switch_job;
	job->pty         = msg->pty;
	job->open_mode   = msg->open_mode;
	job->options     = msg->options;
	format_core_allocs(msg->cred, conf->node_name, conf->cpus,
			   &job->job_alloc_cores, &job->step_alloc_cores,
			   &job->job_mem, &job->step_mem);
	if (job->step_mem) {
		jobacct_gather_set_mem_limit(job->jobid, job->stepid,
					     job->step_mem);
	} else if (job->job_mem) {
		jobacct_gather_set_mem_limit(job->jobid, job->stepid,
					     job->job_mem);
	}

#ifdef HAVE_ALPS_CRAY
	/* This is only used for Cray emulation mode where slurmd is used to
	 * launch job steps. On a real Cray system, ALPS is used to launch
	 * the tasks instead of SLURM. SLURM's task launch RPC does NOT
	 * contain the reservation ID, so just use some non-zero value here
	 * for testing purposes. */
	job->resv_id = 1;
	select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID,
				    &job->resv_id);
#endif

	get_cred_gres(msg->cred, conf->node_name,
		      &job->job_gres_list, &job->step_gres_list);

	list_append(job->sruns, (void *) srun);

	_job_init_task_info(job, msg->global_task_ids[nodeid],
			    msg->ifname, msg->ofname, msg->efname);

	return job;
}
Пример #4
0
/* create a slurmd job structure from a launch tasks message */
extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg,
					       uint16_t protocol_version)
{
	stepd_step_rec_t  *job = NULL;
	srun_info_t   *srun = NULL;
	slurm_addr_t     resp_addr;
	slurm_addr_t     io_addr;
	int            i, nodeid = NO_VAL;

	xassert(msg != NULL);
	xassert(msg->complete_nodelist != NULL);
	debug3("entering stepd_step_rec_create");

	if (acct_gather_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq))
		return NULL;

	job = xmalloc(sizeof(stepd_step_rec_t));
	job->msg = msg;
#ifndef HAVE_FRONT_END
	nodeid = nodelist_find(msg->complete_nodelist, conf->node_name);
	job->node_name = xstrdup(conf->node_name);
#else
	nodeid = 0;
	job->node_name = xstrdup(msg->complete_nodelist);
#endif
	if (nodeid < 0) {
		error("couldn't find node %s in %s",
		      job->node_name, msg->complete_nodelist);
		stepd_step_rec_destroy(job);
		return NULL;
	}

	job->state = SLURMSTEPD_STEP_STARTING;
	slurm_cond_init(&job->state_cond, NULL);
	slurm_mutex_init(&job->state_mutex);
	job->node_tasks	= msg->tasks_to_launch[nodeid];
	i = sizeof(uint16_t) * msg->nnodes;
	job->task_cnts  = xmalloc(i);
	memcpy(job->task_cnts, msg->tasks_to_launch, i);
	job->ntasks	= msg->ntasks;
	job->jobid	= msg->job_id;
	job->stepid	= msg->job_step_id;

	job->uid	= (uid_t) msg->uid;
	job->gid	= (gid_t) msg->gid;
	job->user_name	= xstrdup(msg->user_name);
	job->ngids = (int) msg->ngids;
	job->gids = copy_gids(msg->ngids, msg->gids);

	job->cwd	= xstrdup(msg->cwd);
	job->task_dist	= msg->task_dist;

	job->cpu_bind_type = msg->cpu_bind_type;
	job->cpu_bind = xstrdup(msg->cpu_bind);
	job->mem_bind_type = msg->mem_bind_type;
	job->mem_bind = xstrdup(msg->mem_bind);
	job->cpu_freq_min = msg->cpu_freq_min;
	job->cpu_freq_max = msg->cpu_freq_max;
	job->cpu_freq_gov = msg->cpu_freq_gov;
	job->ckpt_dir = xstrdup(msg->ckpt_dir);
	job->restart_dir = xstrdup(msg->restart_dir);
	job->cpus_per_task = msg->cpus_per_task;

	job->env     = _array_copy(msg->envc, msg->env);
	job->array_job_id  = msg->job_id;
	job->array_task_id = NO_VAL;
	job->node_offset = msg->node_offset;	/* Used for env vars */
	job->pack_jobid  = msg->pack_jobid;	/* Used for env vars */
	job->pack_nnodes = msg->pack_nnodes;	/* Used for env vars */
	if (msg->pack_nnodes && msg->pack_ntasks && msg->pack_task_cnts) {
		job->pack_ntasks = msg->pack_ntasks;	/* Used for env vars */
		i = sizeof(uint16_t) * msg->pack_nnodes;
		job->pack_task_cnts = xmalloc(i);
		memcpy(job->pack_task_cnts, msg->pack_task_cnts, i);
	}
	job->pack_offset = msg->pack_offset;	/* Used for env vars & labels */
	job->pack_task_offset = msg->pack_task_offset;	/* Used for env vars & labels */
	job->pack_node_list = xstrdup(msg->pack_node_list);
	for (i = 0; i < msg->envc; i++) {
		/*                         1234567890123456789 */
		if (!xstrncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19))
			job->array_job_id = atoi(msg->env[i] + 19);
		/*                         12345678901234567890 */
		if (!xstrncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20))
			job->array_task_id = atoi(msg->env[i] + 20);
	}

	job->eio     = eio_handle_create(0);
	job->sruns   = list_create((ListDelF) _srun_info_destructor);

	/*
	 * Based on my testing the next 3 lists here could use the
	 * eio_obj_destroy, but if you do you can get an invalid read.  Since
	 * these stay until the end of the job it isn't that big of a deal.
	 */
	job->clients = list_create(NULL); /* FIXME! Needs destructor */
	job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
	job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
	job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */
	job->incoming_count = 0;
	job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */
	job->outgoing_count = 0;
	job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */

	job->envtp   = xmalloc(sizeof(env_t));
	job->envtp->jobid = -1;
	job->envtp->stepid = -1;
	job->envtp->procid = -1;
	job->envtp->localid = -1;
	job->envtp->nodeid = -1;

	job->envtp->distribution = 0;
	job->envtp->cpu_bind_type = 0;
	job->envtp->cpu_bind = NULL;
	job->envtp->mem_bind_type = 0;
	job->envtp->mem_bind = NULL;
	job->envtp->ckpt_dir = NULL;
	if (!msg->resp_port)
		msg->num_resp_port = 0;
	if (msg->num_resp_port) {
		job->envtp->comm_port =
			msg->resp_port[nodeid % msg->num_resp_port];
		memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t));
		slurm_set_addr(&resp_addr,
			       msg->resp_port[nodeid % msg->num_resp_port],
			       NULL);
	} else {
		memset(&resp_addr, 0, sizeof(slurm_addr_t));
	}
	if (!msg->io_port)
		msg->flags |= LAUNCH_USER_MANAGED_IO;
	if ((msg->flags & LAUNCH_USER_MANAGED_IO) == 0) {
		memcpy(&io_addr,   &msg->orig_addr, sizeof(slurm_addr_t));
		slurm_set_addr(&io_addr,
			       msg->io_port[nodeid % msg->num_io_port],
			       NULL);
	} else {
		memset(&io_addr, 0, sizeof(slurm_addr_t));
	}

	srun = srun_info_create(msg->cred, &resp_addr, &io_addr,
				protocol_version);

	job->profile     = msg->profile;
	job->task_prolog = xstrdup(msg->task_prolog);
	job->task_epilog = xstrdup(msg->task_epilog);

	job->argc    = msg->argc;
	job->argv    = _array_copy(job->argc, msg->argv);

	job->nnodes  = msg->nnodes;
	job->nodeid  = nodeid;
	job->debug   = msg->slurmd_debug;
	job->cpus    = msg->node_cpus;
	job->job_core_spec = msg->job_core_spec;

	/* This needs to happen before acct_gather_profile_startpoll
	   and only really looks at the profile in the job.
	*/
	acct_gather_profile_g_node_step_start(job);

	acct_gather_profile_startpoll(msg->acctg_freq,
				      conf->job_acct_gather_freq);

	job->timelimit   = (time_t) -1;
	job->flags       = msg->flags;
	job->switch_job  = msg->switch_job;
	job->open_mode   = msg->open_mode;
	job->options     = msg->options;
	format_core_allocs(msg->cred, conf->node_name, conf->cpus,
			   &job->job_alloc_cores, &job->step_alloc_cores,
			   &job->job_mem, &job->step_mem);

	/* If users have configured MemLimitEnforce=no
	 * in their slurm.conf keep going.
	 */
	if (job->step_mem
	    && conf->mem_limit_enforce) {
		jobacct_gather_set_mem_limit(job->jobid, job->stepid,
					     job->step_mem);
	} else if (job->job_mem
		   && conf->mem_limit_enforce) {
		jobacct_gather_set_mem_limit(job->jobid, job->stepid,
					     job->job_mem);
	}

#ifdef HAVE_ALPS_CRAY
	/* This is only used for Cray emulation mode where slurmd is used to
	 * launch job steps. On a real Cray system, ALPS is used to launch
	 * the tasks instead of SLURM. SLURM's task launch RPC does NOT
	 * contain the reservation ID, so just use some non-zero value here
	 * for testing purposes. */
	job->resv_id = 1;
	select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID,
				    &job->resv_id);
#endif

	/* only need these values on the extern step, don't copy otherwise */
	if ((msg->job_step_id == SLURM_EXTERN_CONT) && msg->x11) {
		job->x11 = msg->x11;
		job->x11_magic_cookie = xstrdup(msg->x11_magic_cookie);
		job->x11_target_host = xstrdup(msg->x11_target_host);
		job->x11_target_port = msg->x11_target_port;
	}

	get_cred_gres(msg->cred, conf->node_name,
		      &job->job_gres_list, &job->step_gres_list);

	list_append(job->sruns, (void *) srun);

	_job_init_task_info(job, msg->global_task_ids,
			    msg->ifname, msg->ofname, msg->efname);

	return job;
}
Пример #5
0
/* Returns 0 on success, -1 on failure */
static int _fill_job_desc_from_opts(job_desc_msg_t *desc)
{
#ifdef HAVE_REAL_CRAY
	uint64_t pagg_id = job_getjid(getpid());
	/*
	 * Interactive sessions require pam_job.so in /etc/pam.d/common-session
	 * since creating sgi_job containers requires root permissions. This is
	 * the only exception where we allow the fallback of using the SID to
	 * confirm the reservation (caught later, in do_basil_confirm).
	 */
	if (pagg_id == (uint64_t)-1) {
		error("No SGI job container ID detected - please enable the "
		      "Cray job service via /etc/init.d/job");
	} else {
		if (!desc->select_jobinfo)
			desc->select_jobinfo = select_g_select_jobinfo_alloc();

		select_g_select_jobinfo_set(desc->select_jobinfo,
					    SELECT_JOBDATA_PAGG_ID, &pagg_id);
	}
#endif
	desc->contiguous = opt.contiguous ? 1 : 0;
	desc->features = opt.constraints;
	desc->gres = opt.gres;
	if (opt.immediate == 1)
		desc->immediate = 1;
	desc->name = xstrdup(opt.job_name);
	desc->reservation = xstrdup(opt.reservation);
	desc->wckey  = xstrdup(opt.wckey);

	desc->req_nodes = opt.nodelist;
	desc->exc_nodes = opt.exc_nodes;
	desc->partition = opt.partition;
	desc->min_nodes = opt.min_nodes;
	if (opt.max_nodes)
		desc->max_nodes = opt.max_nodes;
	desc->user_id = opt.uid;
	desc->group_id = opt.gid;
	if (opt.dependency)
		desc->dependency = xstrdup(opt.dependency);

	if (opt.cpu_bind)
		desc->cpu_bind       = opt.cpu_bind;
	if (opt.cpu_bind_type)
		desc->cpu_bind_type  = opt.cpu_bind_type;
	if (opt.mem_bind)
		desc->mem_bind       = opt.mem_bind;
	if (opt.mem_bind_type)
		desc->mem_bind_type  = opt.mem_bind_type;
	if (opt.plane_size != NO_VAL)
		desc->plane_size     = opt.plane_size;
	desc->task_dist  = opt.distribution;
	if (opt.plane_size != NO_VAL)
		desc->plane_size = opt.plane_size;

	if (opt.licenses)
		desc->licenses = xstrdup(opt.licenses);
	desc->network = opt.network;
	if (opt.nice)
		desc->nice = NICE_OFFSET + opt.nice;
	desc->mail_type = opt.mail_type;
	if (opt.mail_user)
		desc->mail_user = xstrdup(opt.mail_user);
	if (opt.begin)
		desc->begin_time = opt.begin;
	if (opt.account)
		desc->account = xstrdup(opt.account);
	if (opt.acctg_freq >= 0)
		desc->acctg_freq = opt.acctg_freq;
	if (opt.comment)
		desc->comment = xstrdup(opt.comment);
	if (opt.qos)
		desc->qos = xstrdup(opt.qos);

	if (opt.cwd)
		desc->work_dir = xstrdup(opt.cwd);
	else if (work_dir)
		desc->work_dir = xstrdup(work_dir);

	if (opt.hold)
		desc->priority     = 0;
#ifdef HAVE_BG
	if (opt.geometry[0] > 0) {
		int i;
		for (i=0; i<SYSTEM_DIMENSIONS; i++)
			desc->geometry[i] = opt.geometry[i];
	}
#endif
	if (opt.conn_type != (uint16_t)NO_VAL)
		desc->conn_type[0] = opt.conn_type;
	if (opt.reboot)
		desc->reboot = 1;
	if (opt.no_rotate)
		desc->rotate = 0;
	if (opt.blrtsimage)
		desc->blrtsimage = xstrdup(opt.blrtsimage);
	if (opt.linuximage)
		desc->linuximage = xstrdup(opt.linuximage);
	if (opt.mloaderimage)
		desc->mloaderimage = xstrdup(opt.mloaderimage);
	if (opt.ramdiskimage)
		desc->ramdiskimage = xstrdup(opt.ramdiskimage);

	/* job constraints */
	if (opt.mincpus > -1)
		desc->pn_min_cpus = opt.mincpus;
	if (opt.realmem > -1)
		desc->pn_min_memory = opt.realmem;
	else if (opt.mem_per_cpu > -1)
		desc->pn_min_memory = opt.mem_per_cpu | MEM_PER_CPU;
	if (opt.tmpdisk > -1)
		desc->pn_min_tmp_disk = opt.tmpdisk;
	if (opt.overcommit) {
		desc->min_cpus = opt.min_nodes;
		desc->overcommit = opt.overcommit;
	} else
		desc->min_cpus = opt.ntasks * opt.cpus_per_task;
	if (opt.ntasks_set)
		desc->num_tasks = opt.ntasks;
	if (opt.cpus_set)
		desc->cpus_per_task = opt.cpus_per_task;
	if (opt.ntasks_per_node)
		desc->ntasks_per_node = opt.ntasks_per_node;
	if (opt.ntasks_per_socket > -1)
		desc->ntasks_per_socket = opt.ntasks_per_socket;
	if (opt.ntasks_per_core > -1)
		desc->ntasks_per_core = opt.ntasks_per_core;

	/* node constraints */
	if (opt.sockets_per_node != NO_VAL)
		desc->sockets_per_node = opt.sockets_per_node;
	if (opt.cores_per_socket != NO_VAL)
		desc->cores_per_socket = opt.cores_per_socket;
	if (opt.threads_per_core != NO_VAL)
		desc->threads_per_core = opt.threads_per_core;

	if (opt.no_kill)
		desc->kill_on_node_fail = 0;
	if (opt.time_limit  != NO_VAL)
		desc->time_limit = opt.time_limit;
	if (opt.time_min  != NO_VAL)
		desc->time_min = opt.time_min;
	desc->shared = opt.shared;
	desc->job_id = opt.jobid;

	desc->wait_all_nodes = opt.wait_all_nodes;
	if (opt.warn_signal)
		desc->warn_signal = opt.warn_signal;
	if (opt.warn_time)
		desc->warn_time = opt.warn_time;

	if (opt.spank_job_env_size) {
		desc->spank_job_env      = opt.spank_job_env;
		desc->spank_job_env_size = opt.spank_job_env_size;
	}

	return 0;
}
Пример #6
0
/*
 * Create job description structure based off srun options
 * (see opt.h)
 */
static job_desc_msg_t *_job_desc_msg_create_from_opts(slurm_opt_t *opt_local)
{
	srun_opt_t *srun_opt = opt_local->srun_opt;
	job_desc_msg_t *j = xmalloc(sizeof(*j));
	hostlist_t hl = NULL;
	xassert(srun_opt);

	slurm_init_job_desc_msg(j);
#if defined HAVE_ALPS_CRAY && defined HAVE_REAL_CRAY
	static bool sgi_err_logged = false;
	uint64_t pagg_id = job_getjid(getpid());
	/*
	 * Interactive sessions require pam_job.so in /etc/pam.d/common-session
	 * since creating sgi_job containers requires root permissions. This is
	 * the only exception where we allow the fallback of using the SID to
	 * confirm the reservation (caught later, in do_basil_confirm).
	 */
	if (pagg_id != (uint64_t) -1) {
		if (!j->select_jobinfo)
			j->select_jobinfo = select_g_select_jobinfo_alloc();

		select_g_select_jobinfo_set(j->select_jobinfo,
					    SELECT_JOBDATA_PAGG_ID, &pagg_id);
	} else if (!sgi_err_logged) {
		error("No SGI job container ID detected - please enable the "
		      "Cray job service via /etc/init.d/job");
		sgi_err_logged = true;
	}
#endif

	j->contiguous     = opt_local->contiguous;
	if (opt_local->core_spec != NO_VAL16)
		j->core_spec      = opt_local->core_spec;
	j->features       = opt_local->constraints;
	j->cluster_features = opt_local->c_constraints;
	if (opt_local->gres && xstrcasecmp(opt_local->gres, "NONE"))
		j->gres   = opt_local->gres;
	if (opt_local->immediate == 1)
		j->immediate = opt_local->immediate;
	if (opt_local->job_name)
		j->name   = opt_local->job_name;
	else
		j->name = srun_opt->cmd_name;
	if (srun_opt->argc > 0) {
		j->argc    = 1;
		j->argv    = (char **) xmalloc(sizeof(char *) * 2);
		j->argv[0] = xstrdup(srun_opt->argv[0]);
	}
	if (opt_local->acctg_freq)
		j->acctg_freq     = xstrdup(opt_local->acctg_freq);
	j->reservation    = opt_local->reservation;
	j->wckey          = opt_local->wckey;
	j->x11 = opt.x11;
	if (j->x11) {
		j->x11_magic_cookie = xstrdup(opt.x11_magic_cookie);
		j->x11_target_port = opt.x11_target_port;
	}

	j->req_nodes      = xstrdup(opt_local->nodelist);

	/* simplify the job allocation nodelist,
	 * not laying out tasks until step */
	if (j->req_nodes) {
		hl = hostlist_create(j->req_nodes);
		xfree(opt_local->nodelist);
		opt_local->nodelist = hostlist_ranged_string_xmalloc(hl);
		hostlist_uniq(hl);
		xfree(j->req_nodes);
		j->req_nodes = hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);

	}

	if (((opt_local->distribution & SLURM_DIST_STATE_BASE) ==
	     SLURM_DIST_ARBITRARY) && !j->req_nodes) {
		error("With Arbitrary distribution you need to "
		      "specify a nodelist or hostfile with the -w option");
		return NULL;
	}
	j->extra = opt_local->extra;
	j->exc_nodes      = opt_local->exc_nodes;
	j->partition      = opt_local->partition;
	j->min_nodes      = opt_local->min_nodes;
	if (opt_local->sockets_per_node != NO_VAL)
		j->sockets_per_node    = opt_local->sockets_per_node;
	if (opt_local->cores_per_socket != NO_VAL)
		j->cores_per_socket      = opt_local->cores_per_socket;
	if (opt_local->threads_per_core != NO_VAL) {
		j->threads_per_core    = opt_local->threads_per_core;
		/* if 1 always make sure affinity knows about it */
		if (j->threads_per_core == 1)
			srun_opt->cpu_bind_type |= CPU_BIND_ONE_THREAD_PER_CORE;
	}
	j->user_id        = opt_local->uid;
	j->dependency     = opt_local->dependency;
	if (opt_local->nice != NO_VAL)
		j->nice   = NICE_OFFSET + opt_local->nice;
	if (opt_local->priority)
		j->priority = opt_local->priority;
	if (srun_opt->cpu_bind)
		j->cpu_bind = srun_opt->cpu_bind;
	if (srun_opt->cpu_bind_type)
		j->cpu_bind_type = srun_opt->cpu_bind_type;
	if (opt_local->delay_boot != NO_VAL)
		j->delay_boot = opt_local->delay_boot;
	if (opt_local->mem_bind)
		j->mem_bind       = opt_local->mem_bind;
	if (opt_local->mem_bind_type)
		j->mem_bind_type  = opt_local->mem_bind_type;
	if (opt_local->plane_size != NO_VAL)
		j->plane_size     = opt_local->plane_size;
	j->task_dist      = opt_local->distribution;

	j->group_id       = opt_local->gid;
	j->mail_type      = opt_local->mail_type;

	if (opt_local->ntasks_per_node != NO_VAL)
		j->ntasks_per_node   = opt_local->ntasks_per_node;
	if (opt_local->ntasks_per_socket != NO_VAL)
		j->ntasks_per_socket = opt_local->ntasks_per_socket;
	if (opt_local->ntasks_per_core != NO_VAL)
		j->ntasks_per_core   = opt_local->ntasks_per_core;

	if (opt_local->mail_user)
		j->mail_user = opt_local->mail_user;
	if (opt_local->burst_buffer)
		j->burst_buffer = opt_local->burst_buffer;
	if (opt_local->begin)
		j->begin_time = opt_local->begin;
	if (opt_local->deadline)
		j->deadline = opt_local->deadline;
	if (opt_local->licenses)
		j->licenses = opt_local->licenses;
	if (opt_local->network)
		j->network = opt_local->network;
	if (opt_local->profile)
		j->profile = opt_local->profile;
	if (opt_local->account)
		j->account = opt_local->account;
	if (opt_local->comment)
		j->comment = opt_local->comment;
	if (opt_local->qos)
		j->qos = opt_local->qos;
	if (opt_local->cwd)
		j->work_dir = opt_local->cwd;

	if (opt_local->hold)
		j->priority     = 0;
	if (opt_local->jobid != NO_VAL)
		j->job_id	= opt_local->jobid;
#ifdef HAVE_BG
	if (opt_local->geometry[0] > 0) {
		int i;
		for (i = 0; i < SYSTEM_DIMENSIONS; i++)
			j->geometry[i] = opt_local->geometry[i];
	}
#endif

	memcpy(j->conn_type, opt_local->conn_type, sizeof(j->conn_type));

	if (opt_local->reboot)
		j->reboot = 1;
	if (opt_local->no_rotate)
		j->rotate = 0;

	if (opt_local->blrtsimage)
		j->blrtsimage = opt_local->blrtsimage;
	if (opt_local->linuximage)
		j->linuximage = opt_local->linuximage;
	if (opt_local->mloaderimage)
		j->mloaderimage = opt_local->mloaderimage;
	if (opt_local->ramdiskimage)
		j->ramdiskimage = opt_local->ramdiskimage;

	if (opt_local->max_nodes)
		j->max_nodes    = opt_local->max_nodes;
	else if (opt_local->nodes_set) {
		/* On an allocation if the max nodes isn't set set it
		 * to do the same behavior as with salloc or sbatch.
		 */
		j->max_nodes    = opt_local->min_nodes;
	}
	if (opt_local->pn_min_cpus != NO_VAL)
		j->pn_min_cpus = opt_local->pn_min_cpus;
	if (opt_local->pn_min_memory != NO_VAL64)
		j->pn_min_memory = opt_local->pn_min_memory;
	else if (opt_local->mem_per_cpu != NO_VAL64)
		j->pn_min_memory = opt_local->mem_per_cpu | MEM_PER_CPU;
	if (opt_local->pn_min_tmp_disk != NO_VAL)
		j->pn_min_tmp_disk = opt_local->pn_min_tmp_disk;
	if (opt_local->overcommit) {
		j->min_cpus    = opt_local->min_nodes;
		j->overcommit  = opt_local->overcommit;
	} else if (opt_local->cpus_set)
		j->min_cpus    = opt_local->ntasks * opt_local->cpus_per_task;
	else
		j->min_cpus    = opt_local->ntasks;
	if (opt_local->ntasks_set)
		j->num_tasks   = opt_local->ntasks;

	if (opt_local->cpus_set)
		j->cpus_per_task = opt_local->cpus_per_task;

	if (opt_local->no_kill)
		j->kill_on_node_fail   = 0;
	if (opt_local->time_limit != NO_VAL)
		j->time_limit          = opt_local->time_limit;
	if (opt_local->time_min != NO_VAL)
		j->time_min            = opt_local->time_min;
	if (opt_local->shared != NO_VAL16)
		j->shared = opt_local->shared;

	if (opt_local->warn_signal)
		j->warn_signal = opt_local->warn_signal;
	if (opt_local->warn_time)
		j->warn_time = opt_local->warn_time;
	if (opt_local->job_flags)
		j->bitflags = opt_local->job_flags;

	if (opt_local->cpu_freq_min != NO_VAL)
		j->cpu_freq_min = opt_local->cpu_freq_min;
	if (opt_local->cpu_freq_max != NO_VAL)
		j->cpu_freq_max = opt_local->cpu_freq_max;
	if (opt_local->cpu_freq_gov != NO_VAL)
		j->cpu_freq_gov = opt_local->cpu_freq_gov;

	if (opt_local->req_switch >= 0)
		j->req_switch = opt_local->req_switch;
	if (opt_local->wait4switch >= 0)
		j->wait4switch = opt_local->wait4switch;

	/* srun uses the same listening port for the allocation response
	 * message as all other messages */
	j->alloc_resp_port = slurmctld_comm_addr.port;
	j->other_port = slurmctld_comm_addr.port;

	if (opt_local->spank_job_env_size) {
		j->spank_job_env      = opt_local->spank_job_env;
		j->spank_job_env_size = opt_local->spank_job_env_size;
	}

	if (opt_local->power_flags)
		j->power_flags = opt_local->power_flags;
	if (opt_local->mcs_label)
		j->mcs_label = opt_local->mcs_label;
	j->wait_all_nodes = 1;

	/* If can run on multiple clusters find the earliest run time
	 * and run it there */
	j->clusters = xstrdup(opt_local->clusters);

	return j;
}
Пример #7
0
/* create a slurmd job structure from a launch tasks message */
slurmd_job_t *
job_create(launch_tasks_request_msg_t *msg)
{
    struct passwd *pwd = NULL;
    slurmd_job_t  *job = NULL;
    srun_info_t   *srun = NULL;
    slurm_addr_t     resp_addr;
    slurm_addr_t     io_addr;
    int            nodeid = NO_VAL;

    xassert(msg != NULL);
    xassert(msg->complete_nodelist != NULL);
    debug3("entering job_create");
    if ((pwd = _pwd_create((uid_t)msg->uid)) == NULL) {
        error("uid %ld not found on system", (long) msg->uid);
        slurm_seterrno (ESLURMD_UID_NOT_FOUND);
        return NULL;
    }
    if (!_valid_gid(pwd, &(msg->gid))) {
        slurm_seterrno (ESLURMD_GID_NOT_FOUND);
        _pwd_destroy(pwd);
        return NULL;
    }

    if (msg->job_mem_lim && (msg->acctg_freq != (uint16_t) NO_VAL)
            && (msg->acctg_freq > conf->job_acct_gather_freq)) {
        error("Can't set frequency to %u, it is higher than %u.  "
              "We need it to be at least at this level to "
              "monitor memory usage.",
              msg->acctg_freq, conf->job_acct_gather_freq);
        slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ);
        _pwd_destroy(pwd);
        return NULL;
    }

    job = xmalloc(sizeof(slurmd_job_t));
#ifndef HAVE_FRONT_END
    nodeid = nodelist_find(msg->complete_nodelist, conf->node_name);
    job->node_name = xstrdup(conf->node_name);
#else
    nodeid = 0;
    job->node_name = xstrdup(msg->complete_nodelist);
#endif
    if(nodeid < 0) {
        error("couldn't find node %s in %s",
              job->node_name, msg->complete_nodelist);
        job_destroy(job);
        return NULL;
    }

    job->state	= SLURMSTEPD_STEP_STARTING;
    job->pwd	= pwd;
    job->node_tasks	= msg->tasks_to_launch[nodeid];
    job->ntasks	= msg->ntasks;
    job->jobid	= msg->job_id;
    job->stepid	= msg->job_step_id;

    job->uid	= (uid_t) msg->uid;
    job->gid	= (gid_t) msg->gid;
    job->cwd	= xstrdup(msg->cwd);
    job->task_dist	= msg->task_dist;

    job->cpu_bind_type = msg->cpu_bind_type;
    job->cpu_bind = xstrdup(msg->cpu_bind);
    job->mem_bind_type = msg->mem_bind_type;
    job->mem_bind = xstrdup(msg->mem_bind);
    job->cpu_freq = msg->cpu_freq;
    job->ckpt_dir = xstrdup(msg->ckpt_dir);
    job->restart_dir = xstrdup(msg->restart_dir);
    job->cpus_per_task = msg->cpus_per_task;

    job->env     = _array_copy(msg->envc, msg->env);
    job->eio     = eio_handle_create();
    job->sruns   = list_create((ListDelF) _srun_info_destructor);
    job->clients = list_create(NULL); /* FIXME! Needs destructor */
    job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
    job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
    job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */
    job->incoming_count = 0;
    job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */
    job->outgoing_count = 0;
    job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */

    job->envtp   = xmalloc(sizeof(env_t));
    job->envtp->jobid = -1;
    job->envtp->stepid = -1;
    job->envtp->procid = -1;
    job->envtp->localid = -1;
    job->envtp->nodeid = -1;

    job->envtp->distribution = 0;
    job->envtp->cpu_bind_type = 0;
    job->envtp->cpu_bind = NULL;
    job->envtp->mem_bind_type = 0;
    job->envtp->mem_bind = NULL;
    job->envtp->ckpt_dir = NULL;
    //job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port];

    /*memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t));
    slurm_set_addr(&resp_addr,
    	       msg->resp_port[nodeid % msg->num_resp_port],
    	       NULL);
    job->user_managed_io = msg->user_managed_io;
    if (!msg->user_managed_io) {
    	memcpy(&io_addr,   &msg->orig_addr, sizeof(slurm_addr_t));
    	slurm_set_addr(&io_addr,
    		       msg->io_port[nodeid % msg->num_io_port],
    		       NULL);
    }*/
    //srun = srun_info_create(msg->cred, &resp_addr, &io_addr);
    srun = srun_info_create(NULL, NULL, NULL);
    job->buffered_stdio = msg->buffered_stdio;
    job->labelio = msg->labelio;

    job->task_prolog = xstrdup(msg->task_prolog);
    job->task_epilog = xstrdup(msg->task_epilog);

    job->argc    = msg->argc;
    job->argv    = _array_copy(job->argc, msg->argv);

    job->nnodes  = msg->nnodes;
    job->nodeid  = nodeid;
    job->debug   = msg->slurmd_debug;
    job->cpus    = msg->cpus_allocated[nodeid];
    if (msg->acctg_freq != (uint16_t) NO_VAL)
        jobacct_gather_change_poll(msg->acctg_freq);
    job->multi_prog  = msg->multi_prog;
    job->timelimit   = (time_t) -1;
    job->task_flags  = msg->task_flags;
    job->switch_job  = msg->switch_job;
    job->pty         = msg->pty;
    job->open_mode   = msg->open_mode;
    job->options     = msg->options;
    format_core_allocs(msg->cred, conf->node_name,
                       &job->job_alloc_cores, &job->step_alloc_cores,
                       &job->job_mem, &job->step_mem);
    if (job->step_mem) {
        jobacct_gather_set_mem_limit(job->jobid, job->stepid,
                                     job->step_mem);
    } else if (job->job_mem) {
        jobacct_gather_set_mem_limit(job->jobid, job->stepid,
                                     job->job_mem);
    }

#ifdef HAVE_CRAY
    /* This is only used for Cray emulation mode where slurmd is used to
     * launch job steps. On a real Cray system, ALPS is used to launch
     * the tasks instead of SLURM. SLURM's task launch RPC does NOT
     * contain the reservation ID, so just use some non-zero value here
     * for testing purposes. */
    job->resv_id = 1;
    select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID,
                                &job->resv_id);
#endif

    get_cred_gres(msg->cred, conf->node_name,
                  &job->job_gres_list, &job->step_gres_list);

    list_append(job->sruns, (void *) srun);
    _job_init_task_info(job, msg->global_task_ids[nodeid],
                        msg->ifname, msg->ofname, msg->efname);

    return job;
}