/* create a slurmd job structure from a launch tasks message */ extern stepd_step_rec_t * stepd_step_rec_create(launch_tasks_request_msg_t *msg) { struct passwd *pwd = NULL; stepd_step_rec_t *job = NULL; srun_info_t *srun = NULL; slurm_addr_t resp_addr; slurm_addr_t io_addr; int i, nodeid = NO_VAL; xassert(msg != NULL); xassert(msg->complete_nodelist != NULL); debug3("entering stepd_step_rec_create"); if ((pwd = _pwd_create((uid_t)msg->uid)) == NULL) { error("uid %ld not found on system", (long) msg->uid); slurm_seterrno (ESLURMD_UID_NOT_FOUND); return NULL; } if (!_valid_gid(pwd, &(msg->gid))) { slurm_seterrno (ESLURMD_GID_NOT_FOUND); _pwd_destroy(pwd); return NULL; } if (_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq)) { _pwd_destroy(pwd); return NULL; } job = xmalloc(sizeof(stepd_step_rec_t)); #ifndef HAVE_FRONT_END nodeid = nodelist_find(msg->complete_nodelist, conf->node_name); job->node_name = xstrdup(conf->node_name); #else nodeid = 0; job->node_name = xstrdup(msg->complete_nodelist); #endif if (nodeid < 0) { error("couldn't find node %s in %s", job->node_name, msg->complete_nodelist); stepd_step_rec_destroy(job); return NULL; } job->state = SLURMSTEPD_STEP_STARTING; job->pwd = pwd; job->node_tasks = msg->tasks_to_launch[nodeid]; job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->job_step_id; job->uid = (uid_t) msg->uid; job->gid = (gid_t) msg->gid; job->cwd = xstrdup(msg->cwd); job->task_dist = msg->task_dist; job->cpu_bind_type = msg->cpu_bind_type; job->cpu_bind = xstrdup(msg->cpu_bind); job->mem_bind_type = msg->mem_bind_type; job->mem_bind = xstrdup(msg->mem_bind); job->cpu_freq = msg->cpu_freq; job->ckpt_dir = xstrdup(msg->ckpt_dir); job->restart_dir = xstrdup(msg->restart_dir); job->cpus_per_task = msg->cpus_per_task; job->env = _array_copy(msg->envc, msg->env); job->array_job_id = msg->job_id; job->array_task_id = (uint16_t) NO_VAL; for (i = 0; i < msg->envc; i++) { /* 1234567890123456789 */ if (!strncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19)) job->array_job_id = atoi(msg->env[i] + 19); /* 12345678901234567890 */ if (!strncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20)) job->array_task_id = atoi(msg->env[i] + 20); } job->eio = eio_handle_create(); job->sruns = list_create((ListDelF) _srun_info_destructor); job->clients = list_create(NULL); /* FIXME! Needs destructor */ job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */ job->incoming_count = 0; job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */ job->outgoing_count = 0; job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */ job->envtp = xmalloc(sizeof(env_t)); job->envtp->jobid = -1; job->envtp->stepid = -1; job->envtp->procid = -1; job->envtp->localid = -1; job->envtp->nodeid = -1; job->envtp->distribution = 0; job->envtp->cpu_bind_type = 0; job->envtp->cpu_bind = NULL; job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_dir = NULL; job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port]; memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&resp_addr, msg->resp_port[nodeid % msg->num_resp_port], NULL); job->user_managed_io = msg->user_managed_io; if (!msg->user_managed_io) { memcpy(&io_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&io_addr, msg->io_port[nodeid % msg->num_io_port], NULL); } srun = srun_info_create(msg->cred, &resp_addr, &io_addr); job->buffered_stdio = msg->buffered_stdio; job->labelio = msg->labelio; job->profile = msg->profile; job->task_prolog = xstrdup(msg->task_prolog); job->task_epilog = xstrdup(msg->task_epilog); job->argc = msg->argc; job->argv = _array_copy(job->argc, msg->argv); job->nnodes = msg->nnodes; job->nodeid = nodeid; job->debug = msg->slurmd_debug; job->cpus = msg->cpus_allocated[nodeid]; /* This needs to happen before acct_gather_profile_startpoll and only really looks at the profile in the job. */ acct_gather_profile_g_node_step_start(job); acct_gather_profile_startpoll(msg->acctg_freq, conf->job_acct_gather_freq); job->multi_prog = msg->multi_prog; job->timelimit = (time_t) -1; job->task_flags = msg->task_flags; job->switch_job = msg->switch_job; job->pty = msg->pty; job->open_mode = msg->open_mode; job->options = msg->options; format_core_allocs(msg->cred, conf->node_name, conf->cpus, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); if (job->step_mem) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->step_mem); } else if (job->job_mem) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->job_mem); } #ifdef HAVE_ALPS_CRAY /* This is only used for Cray emulation mode where slurmd is used to * launch job steps. On a real Cray system, ALPS is used to launch * the tasks instead of SLURM. SLURM's task launch RPC does NOT * contain the reservation ID, so just use some non-zero value here * for testing purposes. */ job->resv_id = 1; select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID, &job->resv_id); #endif get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); list_append(job->sruns, (void *) srun); _job_init_task_info(job, msg->global_task_ids[nodeid], msg->ifname, msg->ofname, msg->efname); return job; }
extern stepd_step_rec_t * batch_stepd_step_rec_create(batch_job_launch_msg_t *msg) { struct passwd *pwd; stepd_step_rec_t *job; srun_info_t *srun = NULL; char *in_name; xassert(msg != NULL); debug3("entering batch_stepd_step_rec_create"); if ((pwd = _pwd_create((uid_t)msg->uid)) == NULL) { error("uid %ld not found on system", (long) msg->uid); slurm_seterrno (ESLURMD_UID_NOT_FOUND); return NULL; } if (!_valid_gid(pwd, &(msg->gid))) { slurm_seterrno (ESLURMD_GID_NOT_FOUND); _pwd_destroy(pwd); return NULL; } if (_check_acct_freq_task(msg->job_mem, msg->acctg_freq)) { _pwd_destroy(pwd); return NULL; } job = xmalloc(sizeof(stepd_step_rec_t)); job->state = SLURMSTEPD_STEP_STARTING; job->pwd = pwd; if (msg->cpus_per_node) job->cpus = msg->cpus_per_node[0]; job->node_tasks = 1; job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->step_id; job->array_job_id = msg->array_job_id; job->array_task_id = msg->array_task_id; job->batch = true; /* This needs to happen before acct_gather_profile_startpoll and only really looks at the profile in the job. */ acct_gather_profile_g_node_step_start(job); /* needed for the jobacct_gather plugin to start */ acct_gather_profile_startpoll(msg->acctg_freq, conf->job_acct_gather_freq); job->multi_prog = 0; job->open_mode = msg->open_mode; job->overcommit = (bool) msg->overcommit; job->node_name = xstrdup(conf->node_name); job->uid = (uid_t) msg->uid; job->gid = (gid_t) msg->gid; job->cwd = xstrdup(msg->work_dir); job->ckpt_dir = xstrdup(msg->ckpt_dir); job->restart_dir = xstrdup(msg->restart_dir); job->env = _array_copy(msg->envc, msg->environment); job->eio = eio_handle_create(); job->sruns = list_create((ListDelF) _srun_info_destructor); job->envtp = xmalloc(sizeof(env_t)); job->envtp->jobid = -1; job->envtp->stepid = -1; job->envtp->procid = -1; job->envtp->localid = -1; job->envtp->nodeid = -1; job->envtp->distribution = 0; job->cpu_bind_type = msg->cpu_bind_type; job->cpu_bind = xstrdup(msg->cpu_bind); job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_dir = NULL; job->envtp->restart_cnt = msg->restart_cnt; if (msg->cpus_per_node) job->cpus = msg->cpus_per_node[0]; format_core_allocs(msg->cred, conf->node_name, conf->cpus, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); if (job->step_mem) jobacct_gather_set_mem_limit(job->jobid, NO_VAL, job->step_mem); else if (job->job_mem) jobacct_gather_set_mem_limit(job->jobid, NO_VAL, job->job_mem); get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); srun = srun_info_create(NULL, NULL, NULL); list_append(job->sruns, (void *) srun); if (msg->argc) { job->argc = msg->argc; job->argv = _array_copy(job->argc, msg->argv); } else { job->argc = 1; /* job script has not yet been written out to disk -- * argv will be filled in later by _make_batch_script() */ job->argv = (char **) xmalloc(2 * sizeof(char *)); } job->task = xmalloc(sizeof(stepd_step_task_info_t *)); if (msg->std_err == NULL) msg->std_err = xstrdup(msg->std_out); if (msg->std_in == NULL) in_name = xstrdup("/dev/null"); else in_name = fname_create(job, msg->std_in, 0); job->task[0] = task_info_create(0, 0, in_name, _batchfilename(job, msg->std_out), _batchfilename(job, msg->std_err)); job->task[0]->argc = job->argc; job->task[0]->argv = job->argv; #ifdef HAVE_ALPS_CRAY select_g_select_jobinfo_get(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID, &job->resv_id); #endif return job; }
slurmd_job_t * job_batch_job_create(batch_job_launch_msg_t *msg) { struct passwd *pwd; slurmd_job_t *job; srun_info_t *srun = NULL; char *in_name; xassert(msg != NULL); debug3("entering batch_job_create"); if ((pwd = _pwd_create((uid_t)msg->uid)) == NULL) { error("uid %ld not found on system", (long) msg->uid); slurm_seterrno (ESLURMD_UID_NOT_FOUND); return NULL; } if (!_valid_gid(pwd, &(msg->gid))) { slurm_seterrno (ESLURMD_GID_NOT_FOUND); _pwd_destroy(pwd); return NULL; } if(msg->job_mem && (msg->acctg_freq != (uint16_t) NO_VAL) && (msg->acctg_freq > conf->job_acct_gather_freq)) { error("Can't set frequency to %u, it is higher than %u. " "We need it to be at least at this level to " "monitor memory usage.", msg->acctg_freq, conf->job_acct_gather_freq); slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ); _pwd_destroy(pwd); return NULL; } job = xmalloc(sizeof(slurmd_job_t)); job->state = SLURMSTEPD_STEP_STARTING; job->pwd = pwd; job->cpus = msg->cpus_per_node[0]; job->node_tasks = 1; job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->step_id; job->batch = true; if (msg->acctg_freq != (uint16_t) NO_VAL) jobacct_gather_g_change_poll(msg->acctg_freq); job->multi_prog = 0; job->open_mode = msg->open_mode; job->overcommit = (bool) msg->overcommit; job->node_name = xstrdup(conf->node_name); job->uid = (uid_t) msg->uid; job->gid = (gid_t) msg->gid; job->cwd = xstrdup(msg->work_dir); job->ckpt_dir = xstrdup(msg->ckpt_dir); job->restart_dir = xstrdup(msg->restart_dir); job->env = _array_copy(msg->envc, msg->environment); job->eio = eio_handle_create(); job->sruns = list_create((ListDelF) _srun_info_destructor); job->envtp = xmalloc(sizeof(env_t)); job->envtp->jobid = -1; job->envtp->stepid = -1; job->envtp->procid = -1; job->envtp->localid = -1; job->envtp->nodeid = -1; job->envtp->distribution = 0; job->cpu_bind_type = msg->cpu_bind_type; job->cpu_bind = xstrdup(msg->cpu_bind); job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_dir = NULL; job->envtp->restart_cnt = msg->restart_cnt; job->cpus_per_task = msg->cpus_per_node[0]; format_core_allocs(msg->cred, conf->node_name, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); if (job->step_mem) { jobacct_common_set_mem_limit(job->jobid, NO_VAL, job->step_mem); } else if (job->job_mem) jobacct_common_set_mem_limit(job->jobid, NO_VAL, job->job_mem); get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); srun = srun_info_create(NULL, NULL, NULL); list_append(job->sruns, (void *) srun); if (msg->argc) { job->argc = msg->argc; job->argv = _array_copy(job->argc, msg->argv); } else { job->argc = 1; /* job script has not yet been written out to disk -- * argv will be filled in later by _make_batch_script() */ job->argv = (char **) xmalloc(2 * sizeof(char *)); } job->task = xmalloc(sizeof(slurmd_task_info_t *)); if (msg->std_err == NULL) msg->std_err = xstrdup(msg->std_out); if (msg->std_in == NULL) in_name = xstrdup("/dev/null"); else in_name = fname_create(job, msg->std_in, 0); job->task[0] = task_info_create(0, 0, in_name, _batchfilename(job, msg->std_out), _batchfilename(job, msg->std_err)); job->task[0]->argc = job->argc; job->task[0]->argv = job->argv; #ifdef HAVE_CRAY select_g_select_jobinfo_get(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID, &job->resv_id); #endif return job; }