extern stepd_step_rec_t * batch_stepd_step_rec_create(batch_job_launch_msg_t *msg) { stepd_step_rec_t *job; srun_info_t *srun = NULL; char *in_name; xassert(msg != NULL); debug3("entering batch_stepd_step_rec_create"); if (!_valid_uid_gid((uid_t)msg->uid, &(msg->gid), &(msg->user_name))) return NULL; if (acct_gather_check_acct_freq_task(msg->job_mem, msg->acctg_freq)) return NULL; job = xmalloc(sizeof(stepd_step_rec_t)); job->state = SLURMSTEPD_STEP_STARTING; if (msg->cpus_per_node) job->cpus = msg->cpus_per_node[0]; job->node_tasks = 1; job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->step_id; job->array_job_id = msg->array_job_id; job->array_task_id = msg->array_task_id; job->job_core_spec = msg->job_core_spec; job->batch = true; /* This needs to happen before acct_gather_profile_startpoll and only really looks at the profile in the job. */ acct_gather_profile_g_node_step_start(job); /* needed for the jobacct_gather plugin to start */ acct_gather_profile_startpoll(msg->acctg_freq, conf->job_acct_gather_freq); job->multi_prog = 0; job->open_mode = msg->open_mode; job->overcommit = (bool) msg->overcommit; job->node_name = xstrdup(conf->node_name); job->uid = (uid_t) msg->uid; job->user_name = xstrdup(msg->user_name); job->gid = (gid_t) msg->gid; job->cwd = xstrdup(msg->work_dir); job->ckpt_dir = xstrdup(msg->ckpt_dir); job->restart_dir = xstrdup(msg->restart_dir); job->env = _array_copy(msg->envc, msg->environment); job->eio = eio_handle_create(); job->sruns = list_create((ListDelF) _srun_info_destructor); job->envtp = xmalloc(sizeof(env_t)); job->envtp->jobid = -1; job->envtp->stepid = -1; job->envtp->procid = -1; job->envtp->localid = -1; job->envtp->nodeid = -1; job->envtp->distribution = 0; job->cpu_bind_type = msg->cpu_bind_type; job->cpu_bind = xstrdup(msg->cpu_bind); job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_dir = NULL; job->envtp->restart_cnt = msg->restart_cnt; if (msg->cpus_per_node) job->cpus = msg->cpus_per_node[0]; format_core_allocs(msg->cred, conf->node_name, conf->cpus, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); if (job->step_mem && conf->mem_limit_enforce) jobacct_gather_set_mem_limit(job->jobid, NO_VAL, job->step_mem); else if (job->job_mem && conf->mem_limit_enforce) jobacct_gather_set_mem_limit(job->jobid, NO_VAL, job->job_mem); get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); srun = srun_info_create(NULL, NULL, NULL); list_append(job->sruns, (void *) srun); if (msg->argc) { job->argc = msg->argc; job->argv = _array_copy(job->argc, msg->argv); } else { job->argc = 1; /* job script has not yet been written out to disk -- * argv will be filled in later by _make_batch_script() */ job->argv = (char **) xmalloc(2 * sizeof(char *)); } job->task = xmalloc(sizeof(stepd_step_task_info_t *)); if (msg->std_err == NULL) msg->std_err = xstrdup(msg->std_out); if (msg->std_in == NULL) in_name = xstrdup("/dev/null"); else in_name = fname_create(job, msg->std_in, 0); job->task[0] = task_info_create(0, 0, in_name, _batchfilename(job, msg->std_out), _batchfilename(job, msg->std_err)); job->task[0]->argc = job->argc; job->task[0]->argv = job->argv; #ifdef HAVE_ALPS_CRAY select_g_select_jobinfo_get(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID, &job->resv_id); #endif return job; }
slurmd_job_t * job_batch_job_create(batch_job_launch_msg_t *msg) { struct passwd *pwd; slurmd_job_t *job; srun_info_t *srun = NULL; char *in_name; xassert(msg != NULL); debug3("entering batch_job_create"); if ((pwd = _pwd_create((uid_t)msg->uid)) == NULL) { error("uid %ld not found on system", (long) msg->uid); slurm_seterrno (ESLURMD_UID_NOT_FOUND); return NULL; } if (!_valid_gid(pwd, &(msg->gid))) { slurm_seterrno (ESLURMD_GID_NOT_FOUND); _pwd_destroy(pwd); return NULL; } if(msg->job_mem && (msg->acctg_freq != (uint16_t) NO_VAL) && (msg->acctg_freq > conf->job_acct_gather_freq)) { error("Can't set frequency to %u, it is higher than %u. " "We need it to be at least at this level to " "monitor memory usage.", msg->acctg_freq, conf->job_acct_gather_freq); slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ); _pwd_destroy(pwd); return NULL; } job = xmalloc(sizeof(slurmd_job_t)); job->state = SLURMSTEPD_STEP_STARTING; job->pwd = pwd; if (msg->cpus_per_node) job->cpus = msg->cpus_per_node[0]; job->node_tasks = 1; job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->step_id; job->batch = true; if (msg->acctg_freq != (uint16_t) NO_VAL) jobacct_gather_change_poll(msg->acctg_freq); job->multi_prog = 0; job->open_mode = msg->open_mode; job->overcommit = (bool) msg->overcommit; job->node_name = xstrdup(conf->node_name); job->uid = (uid_t) msg->uid; job->gid = (gid_t) msg->gid; job->cwd = xstrdup(msg->work_dir); job->ckpt_dir = xstrdup(msg->ckpt_dir); job->restart_dir = xstrdup(msg->restart_dir); job->env = _array_copy(msg->envc, msg->environment); job->eio = eio_handle_create(); job->sruns = list_create((ListDelF) _srun_info_destructor); job->envtp = xmalloc(sizeof(env_t)); job->envtp->jobid = -1; job->envtp->stepid = -1; job->envtp->procid = -1; job->envtp->localid = -1; job->envtp->nodeid = -1; job->envtp->distribution = 0; job->cpu_bind_type = msg->cpu_bind_type; job->cpu_bind = xstrdup(msg->cpu_bind); job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_dir = NULL; job->envtp->restart_cnt = msg->restart_cnt; if (msg->cpus_per_node) job->cpus = msg->cpus_per_node[0]; format_core_allocs(msg->cred, conf->node_name, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); if (job->step_mem) jobacct_gather_set_mem_limit(job->jobid, NO_VAL, job->step_mem); else if (job->job_mem) jobacct_gather_set_mem_limit(job->jobid, NO_VAL, job->job_mem); get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); srun = srun_info_create(NULL, NULL, NULL); list_append(job->sruns, (void *) srun); if (msg->argc) { job->argc = msg->argc; job->argv = _array_copy(job->argc, msg->argv); } else { job->argc = 1; /* job script has not yet been written out to disk -- * argv will be filled in later by _make_batch_script() */ job->argv = (char **) xmalloc(2 * sizeof(char *)); } job->task = xmalloc(sizeof(slurmd_task_info_t *)); if (msg->std_err == NULL) msg->std_err = xstrdup(msg->std_out); if (msg->std_in == NULL) in_name = xstrdup("/dev/null"); else in_name = fname_create(job, msg->std_in, 0); job->task[0] = task_info_create(0, 0, in_name, _batchfilename(job, msg->std_out), _batchfilename(job, msg->std_err)); job->task[0]->argc = job->argc; job->task[0]->argv = job->argv; #ifdef HAVE_CRAY select_g_select_jobinfo_get(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID, &job->resv_id); #endif return job; }