示例#1
0
/* create a slurmd job structure from a launch tasks message */
extern stepd_step_rec_t *
stepd_step_rec_create(launch_tasks_request_msg_t *msg)
{
	struct passwd *pwd = NULL;
	stepd_step_rec_t  *job = NULL;
	srun_info_t   *srun = NULL;
	slurm_addr_t     resp_addr;
	slurm_addr_t     io_addr;
	int            i, nodeid = NO_VAL;

	xassert(msg != NULL);
	xassert(msg->complete_nodelist != NULL);
	debug3("entering stepd_step_rec_create");
	if ((pwd = _pwd_create((uid_t)msg->uid)) == NULL) {
		error("uid %ld not found on system", (long) msg->uid);
		slurm_seterrno (ESLURMD_UID_NOT_FOUND);
		return NULL;
	}
	if (!_valid_gid(pwd, &(msg->gid))) {
		slurm_seterrno (ESLURMD_GID_NOT_FOUND);
		_pwd_destroy(pwd);
		return NULL;
	}

	if (_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq)) {
		_pwd_destroy(pwd);
		return NULL;
	}

	job = xmalloc(sizeof(stepd_step_rec_t));
#ifndef HAVE_FRONT_END
	nodeid = nodelist_find(msg->complete_nodelist, conf->node_name);
	job->node_name = xstrdup(conf->node_name);
#else
	nodeid = 0;
	job->node_name = xstrdup(msg->complete_nodelist);
#endif
	if (nodeid < 0) {
		error("couldn't find node %s in %s",
		      job->node_name, msg->complete_nodelist);
		stepd_step_rec_destroy(job);
		return NULL;
	}

	job->state	= SLURMSTEPD_STEP_STARTING;
	job->pwd	= pwd;
	job->node_tasks	= msg->tasks_to_launch[nodeid];
	job->ntasks	= msg->ntasks;
	job->jobid	= msg->job_id;
	job->stepid	= msg->job_step_id;

	job->uid	= (uid_t) msg->uid;
	job->gid	= (gid_t) msg->gid;
	job->cwd	= xstrdup(msg->cwd);
	job->task_dist	= msg->task_dist;

	job->cpu_bind_type = msg->cpu_bind_type;
	job->cpu_bind = xstrdup(msg->cpu_bind);
	job->mem_bind_type = msg->mem_bind_type;
	job->mem_bind = xstrdup(msg->mem_bind);
	job->cpu_freq = msg->cpu_freq;
	job->ckpt_dir = xstrdup(msg->ckpt_dir);
	job->restart_dir = xstrdup(msg->restart_dir);
	job->cpus_per_task = msg->cpus_per_task;

	job->env     = _array_copy(msg->envc, msg->env);
	job->array_job_id  = msg->job_id;
	job->array_task_id = (uint16_t) NO_VAL;
	for (i = 0; i < msg->envc; i++) {
		/*                         1234567890123456789 */
		if (!strncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19))
			job->array_job_id = atoi(msg->env[i] + 19);
		/*                         12345678901234567890 */
		if (!strncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20))
			job->array_task_id = atoi(msg->env[i] + 20);
	}

	job->eio     = eio_handle_create();
	job->sruns   = list_create((ListDelF) _srun_info_destructor);
	job->clients = list_create(NULL); /* FIXME! Needs destructor */
	job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
	job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */
	job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */
	job->incoming_count = 0;
	job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */
	job->outgoing_count = 0;
	job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */

	job->envtp   = xmalloc(sizeof(env_t));
	job->envtp->jobid = -1;
	job->envtp->stepid = -1;
	job->envtp->procid = -1;
	job->envtp->localid = -1;
	job->envtp->nodeid = -1;

	job->envtp->distribution = 0;
	job->envtp->cpu_bind_type = 0;
	job->envtp->cpu_bind = NULL;
	job->envtp->mem_bind_type = 0;
	job->envtp->mem_bind = NULL;
	job->envtp->ckpt_dir = NULL;
	job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port];

	memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t));
	slurm_set_addr(&resp_addr,
		       msg->resp_port[nodeid % msg->num_resp_port],
		       NULL);
	job->user_managed_io = msg->user_managed_io;
	if (!msg->user_managed_io) {
		memcpy(&io_addr,   &msg->orig_addr, sizeof(slurm_addr_t));
		slurm_set_addr(&io_addr,
			       msg->io_port[nodeid % msg->num_io_port],
			       NULL);
	}

	srun = srun_info_create(msg->cred, &resp_addr, &io_addr);

	job->buffered_stdio = msg->buffered_stdio;
	job->labelio = msg->labelio;

	job->profile     = msg->profile;
	job->task_prolog = xstrdup(msg->task_prolog);
	job->task_epilog = xstrdup(msg->task_epilog);

	job->argc    = msg->argc;
	job->argv    = _array_copy(job->argc, msg->argv);

	job->nnodes  = msg->nnodes;
	job->nodeid  = nodeid;
	job->debug   = msg->slurmd_debug;
	job->cpus    = msg->cpus_allocated[nodeid];

	/* This needs to happen before acct_gather_profile_startpoll
	   and only really looks at the profile in the job.
	*/
	acct_gather_profile_g_node_step_start(job);

	acct_gather_profile_startpoll(msg->acctg_freq,
				      conf->job_acct_gather_freq);

	job->multi_prog  = msg->multi_prog;
	job->timelimit   = (time_t) -1;
	job->task_flags  = msg->task_flags;
	job->switch_job  = msg->switch_job;
	job->pty         = msg->pty;
	job->open_mode   = msg->open_mode;
	job->options     = msg->options;
	format_core_allocs(msg->cred, conf->node_name, conf->cpus,
			   &job->job_alloc_cores, &job->step_alloc_cores,
			   &job->job_mem, &job->step_mem);
	if (job->step_mem) {
		jobacct_gather_set_mem_limit(job->jobid, job->stepid,
					     job->step_mem);
	} else if (job->job_mem) {
		jobacct_gather_set_mem_limit(job->jobid, job->stepid,
					     job->job_mem);
	}

#ifdef HAVE_ALPS_CRAY
	/* This is only used for Cray emulation mode where slurmd is used to
	 * launch job steps. On a real Cray system, ALPS is used to launch
	 * the tasks instead of SLURM. SLURM's task launch RPC does NOT
	 * contain the reservation ID, so just use some non-zero value here
	 * for testing purposes. */
	job->resv_id = 1;
	select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID,
				    &job->resv_id);
#endif

	get_cred_gres(msg->cred, conf->node_name,
		      &job->job_gres_list, &job->step_gres_list);

	list_append(job->sruns, (void *) srun);

	_job_init_task_info(job, msg->global_task_ids[nodeid],
			    msg->ifname, msg->ofname, msg->efname);

	return job;
}
示例#2
0
extern stepd_step_rec_t *
batch_stepd_step_rec_create(batch_job_launch_msg_t *msg)
{
	struct passwd *pwd;
	stepd_step_rec_t *job;
	srun_info_t  *srun = NULL;
	char *in_name;

	xassert(msg != NULL);

	debug3("entering batch_stepd_step_rec_create");

	if ((pwd = _pwd_create((uid_t)msg->uid)) == NULL) {
		error("uid %ld not found on system", (long) msg->uid);
		slurm_seterrno (ESLURMD_UID_NOT_FOUND);
		return NULL;
	}
	if (!_valid_gid(pwd, &(msg->gid))) {
		slurm_seterrno (ESLURMD_GID_NOT_FOUND);
		_pwd_destroy(pwd);
		return NULL;
	}

	if (_check_acct_freq_task(msg->job_mem, msg->acctg_freq)) {
		_pwd_destroy(pwd);
		return NULL;
	}

	job = xmalloc(sizeof(stepd_step_rec_t));

	job->state   = SLURMSTEPD_STEP_STARTING;
	job->pwd     = pwd;
	if (msg->cpus_per_node)
		job->cpus    = msg->cpus_per_node[0];
	job->node_tasks  = 1;
	job->ntasks  = msg->ntasks;
	job->jobid   = msg->job_id;
	job->stepid  = msg->step_id;
	job->array_job_id  = msg->array_job_id;
	job->array_task_id = msg->array_task_id;

	job->batch   = true;
	/* This needs to happen before acct_gather_profile_startpoll
	   and only really looks at the profile in the job.
	*/
	acct_gather_profile_g_node_step_start(job);
	/* needed for the jobacct_gather plugin to start */
	acct_gather_profile_startpoll(msg->acctg_freq,
				      conf->job_acct_gather_freq);

	job->multi_prog = 0;
	job->open_mode  = msg->open_mode;
	job->overcommit = (bool) msg->overcommit;
	job->node_name  = xstrdup(conf->node_name);

	job->uid     = (uid_t) msg->uid;
	job->gid     = (gid_t) msg->gid;
	job->cwd     = xstrdup(msg->work_dir);

	job->ckpt_dir = xstrdup(msg->ckpt_dir);
	job->restart_dir = xstrdup(msg->restart_dir);

	job->env     = _array_copy(msg->envc, msg->environment);
	job->eio     = eio_handle_create();
	job->sruns   = list_create((ListDelF) _srun_info_destructor);
	job->envtp   = xmalloc(sizeof(env_t));
	job->envtp->jobid = -1;
	job->envtp->stepid = -1;
	job->envtp->procid = -1;
	job->envtp->localid = -1;
	job->envtp->nodeid = -1;

	job->envtp->distribution = 0;
	job->cpu_bind_type = msg->cpu_bind_type;
	job->cpu_bind = xstrdup(msg->cpu_bind);
	job->envtp->mem_bind_type = 0;
	job->envtp->mem_bind = NULL;
	job->envtp->ckpt_dir = NULL;
	job->envtp->restart_cnt = msg->restart_cnt;

	if (msg->cpus_per_node)
		job->cpus    = msg->cpus_per_node[0];
	format_core_allocs(msg->cred, conf->node_name, conf->cpus,
			   &job->job_alloc_cores, &job->step_alloc_cores,
			   &job->job_mem, &job->step_mem);
	if (job->step_mem)
		jobacct_gather_set_mem_limit(job->jobid, NO_VAL, job->step_mem);
	else if (job->job_mem)
		jobacct_gather_set_mem_limit(job->jobid, NO_VAL, job->job_mem);

	get_cred_gres(msg->cred, conf->node_name,
		      &job->job_gres_list, &job->step_gres_list);

	srun = srun_info_create(NULL, NULL, NULL);

	list_append(job->sruns, (void *) srun);

	if (msg->argc) {
		job->argc    = msg->argc;
		job->argv    = _array_copy(job->argc, msg->argv);
	} else {
		job->argc    = 1;
		/* job script has not yet been written out to disk --
		 * argv will be filled in later by _make_batch_script()
		 */
		job->argv    = (char **) xmalloc(2 * sizeof(char *));
	}

	job->task = xmalloc(sizeof(stepd_step_task_info_t *));
	if (msg->std_err == NULL)
		msg->std_err = xstrdup(msg->std_out);

	if (msg->std_in == NULL)
		in_name = xstrdup("/dev/null");
	else
		in_name = fname_create(job, msg->std_in, 0);

	job->task[0] = task_info_create(0, 0,
					in_name,
					_batchfilename(job, msg->std_out),
					_batchfilename(job, msg->std_err));
	job->task[0]->argc = job->argc;
	job->task[0]->argv = job->argv;

#ifdef HAVE_ALPS_CRAY
	select_g_select_jobinfo_get(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID,
				    &job->resv_id);
#endif

	return job;
}
示例#3
0
slurmd_job_t *
job_batch_job_create(batch_job_launch_msg_t *msg)
{
	struct passwd *pwd;
	slurmd_job_t *job;
	srun_info_t  *srun = NULL;
	char *in_name;

	xassert(msg != NULL);

	debug3("entering batch_job_create");

	if ((pwd = _pwd_create((uid_t)msg->uid)) == NULL) {
		error("uid %ld not found on system", (long) msg->uid);
		slurm_seterrno (ESLURMD_UID_NOT_FOUND);
		return NULL;
	}
	if (!_valid_gid(pwd, &(msg->gid))) {
		slurm_seterrno (ESLURMD_GID_NOT_FOUND);
		_pwd_destroy(pwd);
		return NULL;
	}
	if(msg->job_mem && (msg->acctg_freq != (uint16_t) NO_VAL)
	   && (msg->acctg_freq > conf->job_acct_gather_freq)) {
		error("Can't set frequency to %u, it is higher than %u.  "
		      "We need it to be at least at this level to "
		      "monitor memory usage.",
		      msg->acctg_freq, conf->job_acct_gather_freq);
		slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ);
		_pwd_destroy(pwd);
		return NULL;
	}

	job = xmalloc(sizeof(slurmd_job_t));

	job->state   = SLURMSTEPD_STEP_STARTING;
	job->pwd     = pwd;
	job->cpus    = msg->cpus_per_node[0];
	job->node_tasks  = 1;
	job->ntasks  = msg->ntasks;
	job->jobid   = msg->job_id;
	job->stepid  = msg->step_id;

	job->batch   = true;
	if (msg->acctg_freq != (uint16_t) NO_VAL)
		jobacct_gather_g_change_poll(msg->acctg_freq);
	job->multi_prog = 0;
	job->open_mode  = msg->open_mode;
	job->overcommit = (bool) msg->overcommit;
	job->node_name  = xstrdup(conf->node_name);

	job->uid     = (uid_t) msg->uid;
	job->gid     = (gid_t) msg->gid;
	job->cwd     = xstrdup(msg->work_dir);

	job->ckpt_dir = xstrdup(msg->ckpt_dir);
	job->restart_dir = xstrdup(msg->restart_dir);

	job->env     = _array_copy(msg->envc, msg->environment);
	job->eio     = eio_handle_create();
	job->sruns   = list_create((ListDelF) _srun_info_destructor);
	job->envtp   = xmalloc(sizeof(env_t));
	job->envtp->jobid = -1;
	job->envtp->stepid = -1;
	job->envtp->procid = -1;
	job->envtp->localid = -1;
	job->envtp->nodeid = -1;

	job->envtp->distribution = 0;
	job->cpu_bind_type = msg->cpu_bind_type;
	job->cpu_bind = xstrdup(msg->cpu_bind);
	job->envtp->mem_bind_type = 0;
	job->envtp->mem_bind = NULL;
	job->envtp->ckpt_dir = NULL;
	job->envtp->restart_cnt = msg->restart_cnt;

	job->cpus_per_task = msg->cpus_per_node[0];
	format_core_allocs(msg->cred, conf->node_name,
			   &job->job_alloc_cores, &job->step_alloc_cores,
			   &job->job_mem, &job->step_mem);
	if (job->step_mem) {
		jobacct_common_set_mem_limit(job->jobid, NO_VAL,
					     job->step_mem);
	} else if (job->job_mem)
		jobacct_common_set_mem_limit(job->jobid, NO_VAL, job->job_mem);

	get_cred_gres(msg->cred, conf->node_name,
		      &job->job_gres_list, &job->step_gres_list);

	srun = srun_info_create(NULL, NULL, NULL);

	list_append(job->sruns, (void *) srun);

	if (msg->argc) {
		job->argc    = msg->argc;
		job->argv    = _array_copy(job->argc, msg->argv);
	} else {
		job->argc    = 1;
		/* job script has not yet been written out to disk --
		 * argv will be filled in later by _make_batch_script()
		 */
		job->argv    = (char **) xmalloc(2 * sizeof(char *));
	}

	job->task = xmalloc(sizeof(slurmd_task_info_t *));
	if (msg->std_err == NULL)
		msg->std_err = xstrdup(msg->std_out);

	if (msg->std_in == NULL)
		in_name = xstrdup("/dev/null");
	else
		in_name = fname_create(job, msg->std_in, 0);

	job->task[0] = task_info_create(0, 0,
					in_name,
					_batchfilename(job, msg->std_out),
					_batchfilename(job, msg->std_err));
	job->task[0]->argc = job->argc;
	job->task[0]->argv = job->argv;

#ifdef HAVE_CRAY
	select_g_select_jobinfo_get(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID,
				    &job->resv_id);
#endif

	return job;
}