/* log the supplied slurm task launch message as debug3() level */ void slurm_print_launch_task_msg(launch_tasks_request_msg_t *msg, char *name) { int i; int node_id = nodelist_find(msg->complete_nodelist, name); debug3("job_id: %u", msg->job_id); debug3("job_step_id: %u", msg->job_step_id); if (msg->pack_jobid != NO_VAL) debug3("pack_jobid: %u", msg->pack_jobid); if (msg->pack_offset != NO_VAL) debug3("pack_offset: %u", msg->pack_offset); debug3("uid: %u", msg->uid); debug3("gid: %u", msg->gid); debug3("tasks_to_launch: %u", *(msg->tasks_to_launch)); debug3("envc: %u", msg->envc); for (i = 0; i < msg->envc; i++) { debug3("env[%d]: %s", i, msg->env[i]); } debug3("cwd: %s", msg->cwd); debug3("argc: %u", msg->argc); for (i = 0; i < msg->argc; i++) { debug3("argv[%d]: %s", i, msg->argv[i]); } debug3("msg -> resp_port = %u", *(msg->resp_port)); debug3("msg -> io_port = %u", *(msg->io_port)); debug3("msg -> flags = %x", msg->flags); for (i = 0; i < msg->tasks_to_launch[node_id]; i++) { debug3("global_task_id[%d]: %u ", i, msg->global_task_ids[node_id][i]); } }
/* * Given a job step request, return an equivalent local bitmap for this node * IN req - The job step launch request * OUT hw_sockets - number of actual sockets on this node * OUT hw_cores - number of actual cores per socket on this node * OUT hw_threads - number of actual threads per core on this node * RET: bitmap of processors available to this job step on this node * OR NULL on error */ static bitstr_t *_get_avail_map(launch_tasks_request_msg_t *req, uint16_t *hw_sockets, uint16_t *hw_cores, uint16_t *hw_threads) { bitstr_t *req_map, *hw_map; slurm_cred_arg_t arg; uint16_t p, t, new_p, num_cpus, sockets, cores; int job_node_id; int start; char *str; int spec_thread_cnt = 0; *hw_sockets = conf->sockets; *hw_cores = conf->cores; *hw_threads = conf->threads; if (slurm_cred_get_args(req->cred, &arg) != SLURM_SUCCESS) { error("task/affinity: job lacks a credential"); return NULL; } /* we need this node's ID in relation to the whole * job allocation, not just this jobstep */ job_node_id = nodelist_find(arg.job_hostlist, conf->node_name); start = _get_local_node_info(&arg, job_node_id, &sockets, &cores); if (start < 0) { error("task/affinity: missing node %d in job credential", job_node_id); slurm_cred_free_args(&arg); return NULL; } debug3("task/affinity: slurmctld s %u c %u; hw s %u c %u t %u", sockets, cores, *hw_sockets, *hw_cores, *hw_threads); num_cpus = MIN((sockets * cores), ((*hw_sockets)*(*hw_cores))); req_map = (bitstr_t *) bit_alloc(num_cpus); hw_map = (bitstr_t *) bit_alloc(conf->block_map_size); /* Transfer core_bitmap data to local req_map. * The MOD function handles the case where fewer processes * physically exist than are configured (slurmd is out of * sync with the slurmctld daemon). */ for (p = 0; p < (sockets * cores); p++) { if (bit_test(arg.step_core_bitmap, start+p)) bit_set(req_map, (p % num_cpus)); } str = (char *)bit_fmt_hexmask(req_map); debug3("task/affinity: job %u.%u core mask from slurmctld: %s", req->job_id, req->job_step_id, str); xfree(str); for (p = 0; p < num_cpus; p++) { if (bit_test(req_map, p) == 0) continue; /* If we are pretending we have a larger system than we really have this is needed to make sure we don't bust the bank. */ new_p = p % conf->block_map_size; /* core_bitmap does not include threads, so we * add them here but limit them to what the job * requested */ for (t = 0; t < (*hw_threads); t++) { uint16_t bit = new_p * (*hw_threads) + t; bit %= conf->block_map_size; bit_set(hw_map, bit); } } if ((req->job_core_spec != (uint16_t) NO_VAL) && (req->job_core_spec & CORE_SPEC_THREAD) && (req->job_core_spec != CORE_SPEC_THREAD)) { spec_thread_cnt = req->job_core_spec & (~CORE_SPEC_THREAD); } if (spec_thread_cnt) { /* Skip specialized threads as needed */ int i, t, c, s; for (t = conf->threads - 1; ((t >= 0) && (spec_thread_cnt > 0)); t--) { for (c = conf->cores - 1; ((c >= 0) && (spec_thread_cnt > 0)); c--) { for (s = conf->sockets - 1; ((s >= 0) && (spec_thread_cnt > 0)); s--) { i = s * conf->cores + c; i = (i * conf->threads) + t; bit_clear(hw_map, i); spec_thread_cnt--; } } } } str = (char *)bit_fmt_hexmask(hw_map); debug3("task/affinity: job %u.%u CPU final mask for local node: %s", req->job_id, req->job_step_id, str); xfree(str); FREE_NULL_BITMAP(req_map); slurm_cred_free_args(&arg); return hw_map; }
/* create a slurmd job structure from a launch tasks message */ extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg, uint16_t protocol_version) { stepd_step_rec_t *job = NULL; srun_info_t *srun = NULL; slurm_addr_t resp_addr; slurm_addr_t io_addr; int i, nodeid = NO_VAL; xassert(msg != NULL); xassert(msg->complete_nodelist != NULL); debug3("entering stepd_step_rec_create"); if (acct_gather_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq)) return NULL; job = xmalloc(sizeof(stepd_step_rec_t)); job->msg = msg; #ifndef HAVE_FRONT_END nodeid = nodelist_find(msg->complete_nodelist, conf->node_name); job->node_name = xstrdup(conf->node_name); #else nodeid = 0; job->node_name = xstrdup(msg->complete_nodelist); #endif if (nodeid < 0) { error("couldn't find node %s in %s", job->node_name, msg->complete_nodelist); stepd_step_rec_destroy(job); return NULL; } job->state = SLURMSTEPD_STEP_STARTING; slurm_cond_init(&job->state_cond, NULL); slurm_mutex_init(&job->state_mutex); job->node_tasks = msg->tasks_to_launch[nodeid]; i = sizeof(uint16_t) * msg->nnodes; job->task_cnts = xmalloc(i); memcpy(job->task_cnts, msg->tasks_to_launch, i); job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->job_step_id; job->uid = (uid_t) msg->uid; job->gid = (gid_t) msg->gid; job->user_name = xstrdup(msg->user_name); job->ngids = (int) msg->ngids; job->gids = copy_gids(msg->ngids, msg->gids); job->cwd = xstrdup(msg->cwd); job->task_dist = msg->task_dist; job->cpu_bind_type = msg->cpu_bind_type; job->cpu_bind = xstrdup(msg->cpu_bind); job->mem_bind_type = msg->mem_bind_type; job->mem_bind = xstrdup(msg->mem_bind); job->cpu_freq_min = msg->cpu_freq_min; job->cpu_freq_max = msg->cpu_freq_max; job->cpu_freq_gov = msg->cpu_freq_gov; job->ckpt_dir = xstrdup(msg->ckpt_dir); job->restart_dir = xstrdup(msg->restart_dir); job->cpus_per_task = msg->cpus_per_task; job->env = _array_copy(msg->envc, msg->env); job->array_job_id = msg->job_id; job->array_task_id = NO_VAL; job->node_offset = msg->node_offset; /* Used for env vars */ job->pack_jobid = msg->pack_jobid; /* Used for env vars */ job->pack_nnodes = msg->pack_nnodes; /* Used for env vars */ if (msg->pack_nnodes && msg->pack_ntasks && msg->pack_task_cnts) { job->pack_ntasks = msg->pack_ntasks; /* Used for env vars */ i = sizeof(uint16_t) * msg->pack_nnodes; job->pack_task_cnts = xmalloc(i); memcpy(job->pack_task_cnts, msg->pack_task_cnts, i); } job->pack_offset = msg->pack_offset; /* Used for env vars & labels */ job->pack_task_offset = msg->pack_task_offset; /* Used for env vars & labels */ job->pack_node_list = xstrdup(msg->pack_node_list); for (i = 0; i < msg->envc; i++) { /* 1234567890123456789 */ if (!xstrncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19)) job->array_job_id = atoi(msg->env[i] + 19); /* 12345678901234567890 */ if (!xstrncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20)) job->array_task_id = atoi(msg->env[i] + 20); } job->eio = eio_handle_create(0); job->sruns = list_create((ListDelF) _srun_info_destructor); /* * Based on my testing the next 3 lists here could use the * eio_obj_destroy, but if you do you can get an invalid read. Since * these stay until the end of the job it isn't that big of a deal. */ job->clients = list_create(NULL); /* FIXME! Needs destructor */ job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */ job->incoming_count = 0; job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */ job->outgoing_count = 0; job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */ job->envtp = xmalloc(sizeof(env_t)); job->envtp->jobid = -1; job->envtp->stepid = -1; job->envtp->procid = -1; job->envtp->localid = -1; job->envtp->nodeid = -1; job->envtp->distribution = 0; job->envtp->cpu_bind_type = 0; job->envtp->cpu_bind = NULL; job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_dir = NULL; if (!msg->resp_port) msg->num_resp_port = 0; if (msg->num_resp_port) { job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port]; memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&resp_addr, msg->resp_port[nodeid % msg->num_resp_port], NULL); } else { memset(&resp_addr, 0, sizeof(slurm_addr_t)); } if (!msg->io_port) msg->flags |= LAUNCH_USER_MANAGED_IO; if ((msg->flags & LAUNCH_USER_MANAGED_IO) == 0) { memcpy(&io_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&io_addr, msg->io_port[nodeid % msg->num_io_port], NULL); } else { memset(&io_addr, 0, sizeof(slurm_addr_t)); } srun = srun_info_create(msg->cred, &resp_addr, &io_addr, protocol_version); job->profile = msg->profile; job->task_prolog = xstrdup(msg->task_prolog); job->task_epilog = xstrdup(msg->task_epilog); job->argc = msg->argc; job->argv = _array_copy(job->argc, msg->argv); job->nnodes = msg->nnodes; job->nodeid = nodeid; job->debug = msg->slurmd_debug; job->cpus = msg->node_cpus; job->job_core_spec = msg->job_core_spec; /* This needs to happen before acct_gather_profile_startpoll and only really looks at the profile in the job. */ acct_gather_profile_g_node_step_start(job); acct_gather_profile_startpoll(msg->acctg_freq, conf->job_acct_gather_freq); job->timelimit = (time_t) -1; job->flags = msg->flags; job->switch_job = msg->switch_job; job->open_mode = msg->open_mode; job->options = msg->options; format_core_allocs(msg->cred, conf->node_name, conf->cpus, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); /* If users have configured MemLimitEnforce=no * in their slurm.conf keep going. */ if (job->step_mem && conf->mem_limit_enforce) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->step_mem); } else if (job->job_mem && conf->mem_limit_enforce) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->job_mem); } #ifdef HAVE_ALPS_CRAY /* This is only used for Cray emulation mode where slurmd is used to * launch job steps. On a real Cray system, ALPS is used to launch * the tasks instead of SLURM. SLURM's task launch RPC does NOT * contain the reservation ID, so just use some non-zero value here * for testing purposes. */ job->resv_id = 1; select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID, &job->resv_id); #endif /* only need these values on the extern step, don't copy otherwise */ if ((msg->job_step_id == SLURM_EXTERN_CONT) && msg->x11) { job->x11 = msg->x11; job->x11_magic_cookie = xstrdup(msg->x11_magic_cookie); job->x11_target_host = xstrdup(msg->x11_target_host); job->x11_target_port = msg->x11_target_port; } get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); list_append(job->sruns, (void *) srun); _job_init_task_info(job, msg->global_task_ids, msg->ifname, msg->ofname, msg->efname); return job; }
/* create a slurmd job structure from a launch tasks message */ extern stepd_step_rec_t * stepd_step_rec_create(launch_tasks_request_msg_t *msg) { stepd_step_rec_t *job = NULL; srun_info_t *srun = NULL; slurm_addr_t resp_addr; slurm_addr_t io_addr; int i, nodeid = NO_VAL; xassert(msg != NULL); xassert(msg->complete_nodelist != NULL); debug3("entering stepd_step_rec_create"); if (!_valid_uid_gid((uid_t)msg->uid, &(msg->gid), &(msg->user_name))) return NULL; if (_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq)) return NULL; job = xmalloc(sizeof(stepd_step_rec_t)); #ifndef HAVE_FRONT_END nodeid = nodelist_find(msg->complete_nodelist, conf->node_name); job->node_name = xstrdup(conf->node_name); #else nodeid = 0; job->node_name = xstrdup(msg->complete_nodelist); #endif if (nodeid < 0) { error("couldn't find node %s in %s", job->node_name, msg->complete_nodelist); stepd_step_rec_destroy(job); return NULL; } job->state = SLURMSTEPD_STEP_STARTING; job->node_tasks = msg->tasks_to_launch[nodeid]; job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->job_step_id; job->uid = (uid_t) msg->uid; job->user_name = xstrdup(msg->user_name); job->gid = (gid_t) msg->gid; job->cwd = xstrdup(msg->cwd); job->task_dist = msg->task_dist; job->cpu_bind_type = msg->cpu_bind_type; job->cpu_bind = xstrdup(msg->cpu_bind); job->mem_bind_type = msg->mem_bind_type; job->mem_bind = xstrdup(msg->mem_bind); job->cpu_freq = msg->cpu_freq; job->ckpt_dir = xstrdup(msg->ckpt_dir); job->restart_dir = xstrdup(msg->restart_dir); job->cpus_per_task = msg->cpus_per_task; job->env = _array_copy(msg->envc, msg->env); job->array_job_id = msg->job_id; job->array_task_id = (uint16_t) NO_VAL; for (i = 0; i < msg->envc; i++) { /* 1234567890123456789 */ if (!strncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19)) job->array_job_id = atoi(msg->env[i] + 19); /* 12345678901234567890 */ if (!strncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20)) job->array_task_id = atoi(msg->env[i] + 20); } job->eio = eio_handle_create(); job->sruns = list_create((ListDelF) _srun_info_destructor); job->clients = list_create(NULL); /* FIXME! Needs destructor */ job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */ job->incoming_count = 0; job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */ job->outgoing_count = 0; job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */ job->envtp = xmalloc(sizeof(env_t)); job->envtp->jobid = -1; job->envtp->stepid = -1; job->envtp->procid = -1; job->envtp->localid = -1; job->envtp->nodeid = -1; job->envtp->distribution = 0; job->envtp->cpu_bind_type = 0; job->envtp->cpu_bind = NULL; job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_dir = NULL; job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port]; memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&resp_addr, msg->resp_port[nodeid % msg->num_resp_port], NULL); job->user_managed_io = msg->user_managed_io; if (!msg->user_managed_io) { memcpy(&io_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&io_addr, msg->io_port[nodeid % msg->num_io_port], NULL); } srun = srun_info_create(msg->cred, &resp_addr, &io_addr); job->buffered_stdio = msg->buffered_stdio; job->labelio = msg->labelio; job->profile = msg->profile; job->task_prolog = xstrdup(msg->task_prolog); job->task_epilog = xstrdup(msg->task_epilog); job->argc = msg->argc; job->argv = _array_copy(job->argc, msg->argv); job->nnodes = msg->nnodes; job->nodeid = nodeid; job->debug = msg->slurmd_debug; job->cpus = msg->cpus_allocated[nodeid]; /* This needs to happen before acct_gather_profile_startpoll and only really looks at the profile in the job. */ acct_gather_profile_g_node_step_start(job); acct_gather_profile_startpoll(msg->acctg_freq, conf->job_acct_gather_freq); job->multi_prog = msg->multi_prog; job->timelimit = (time_t) -1; job->task_flags = msg->task_flags; job->switch_job = msg->switch_job; job->pty = msg->pty; job->open_mode = msg->open_mode; job->options = msg->options; format_core_allocs(msg->cred, conf->node_name, conf->cpus, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); if (job->step_mem) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->step_mem); } else if (job->job_mem) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->job_mem); } #ifdef HAVE_ALPS_CRAY /* This is only used for Cray emulation mode where slurmd is used to * launch job steps. On a real Cray system, ALPS is used to launch * the tasks instead of SLURM. SLURM's task launch RPC does NOT * contain the reservation ID, so just use some non-zero value here * for testing purposes. */ job->resv_id = 1; select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID, &job->resv_id); #endif get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); list_append(job->sruns, (void *) srun); _job_init_task_info(job, msg->global_task_ids[nodeid], msg->ifname, msg->ofname, msg->efname); return job; }
/* create a slurmd job structure from a launch tasks message */ slurmd_job_t * job_create(launch_tasks_request_msg_t *msg) { struct passwd *pwd = NULL; slurmd_job_t *job = NULL; srun_info_t *srun = NULL; slurm_addr_t resp_addr; slurm_addr_t io_addr; int nodeid = NO_VAL; xassert(msg != NULL); xassert(msg->complete_nodelist != NULL); debug3("entering job_create"); if ((pwd = _pwd_create((uid_t)msg->uid)) == NULL) { error("uid %ld not found on system", (long) msg->uid); slurm_seterrno (ESLURMD_UID_NOT_FOUND); return NULL; } if (!_valid_gid(pwd, &(msg->gid))) { slurm_seterrno (ESLURMD_GID_NOT_FOUND); _pwd_destroy(pwd); return NULL; } if (msg->job_mem_lim && (msg->acctg_freq != (uint16_t) NO_VAL) && (msg->acctg_freq > conf->job_acct_gather_freq)) { error("Can't set frequency to %u, it is higher than %u. " "We need it to be at least at this level to " "monitor memory usage.", msg->acctg_freq, conf->job_acct_gather_freq); slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ); _pwd_destroy(pwd); return NULL; } job = xmalloc(sizeof(slurmd_job_t)); #ifndef HAVE_FRONT_END nodeid = nodelist_find(msg->complete_nodelist, conf->node_name); job->node_name = xstrdup(conf->node_name); #else nodeid = 0; job->node_name = xstrdup(msg->complete_nodelist); #endif if(nodeid < 0) { error("couldn't find node %s in %s", job->node_name, msg->complete_nodelist); job_destroy(job); return NULL; } job->state = SLURMSTEPD_STEP_STARTING; job->pwd = pwd; job->node_tasks = msg->tasks_to_launch[nodeid]; job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->job_step_id; job->uid = (uid_t) msg->uid; job->gid = (gid_t) msg->gid; job->cwd = xstrdup(msg->cwd); job->task_dist = msg->task_dist; job->cpu_bind_type = msg->cpu_bind_type; job->cpu_bind = xstrdup(msg->cpu_bind); job->mem_bind_type = msg->mem_bind_type; job->mem_bind = xstrdup(msg->mem_bind); job->cpu_freq = msg->cpu_freq; job->ckpt_dir = xstrdup(msg->ckpt_dir); job->restart_dir = xstrdup(msg->restart_dir); job->cpus_per_task = msg->cpus_per_task; job->env = _array_copy(msg->envc, msg->env); job->eio = eio_handle_create(); job->sruns = list_create((ListDelF) _srun_info_destructor); job->clients = list_create(NULL); /* FIXME! Needs destructor */ job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */ job->incoming_count = 0; job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */ job->outgoing_count = 0; job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */ job->envtp = xmalloc(sizeof(env_t)); job->envtp->jobid = -1; job->envtp->stepid = -1; job->envtp->procid = -1; job->envtp->localid = -1; job->envtp->nodeid = -1; job->envtp->distribution = 0; job->envtp->cpu_bind_type = 0; job->envtp->cpu_bind = NULL; job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_dir = NULL; //job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port]; /*memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&resp_addr, msg->resp_port[nodeid % msg->num_resp_port], NULL); job->user_managed_io = msg->user_managed_io; if (!msg->user_managed_io) { memcpy(&io_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&io_addr, msg->io_port[nodeid % msg->num_io_port], NULL); }*/ //srun = srun_info_create(msg->cred, &resp_addr, &io_addr); srun = srun_info_create(NULL, NULL, NULL); job->buffered_stdio = msg->buffered_stdio; job->labelio = msg->labelio; job->task_prolog = xstrdup(msg->task_prolog); job->task_epilog = xstrdup(msg->task_epilog); job->argc = msg->argc; job->argv = _array_copy(job->argc, msg->argv); job->nnodes = msg->nnodes; job->nodeid = nodeid; job->debug = msg->slurmd_debug; job->cpus = msg->cpus_allocated[nodeid]; if (msg->acctg_freq != (uint16_t) NO_VAL) jobacct_gather_change_poll(msg->acctg_freq); job->multi_prog = msg->multi_prog; job->timelimit = (time_t) -1; job->task_flags = msg->task_flags; job->switch_job = msg->switch_job; job->pty = msg->pty; job->open_mode = msg->open_mode; job->options = msg->options; format_core_allocs(msg->cred, conf->node_name, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); if (job->step_mem) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->step_mem); } else if (job->job_mem) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->job_mem); } #ifdef HAVE_CRAY /* This is only used for Cray emulation mode where slurmd is used to * launch job steps. On a real Cray system, ALPS is used to launch * the tasks instead of SLURM. SLURM's task launch RPC does NOT * contain the reservation ID, so just use some non-zero value here * for testing purposes. */ job->resv_id = 1; select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID, &job->resv_id); #endif get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); list_append(job->sruns, (void *) srun); _job_init_task_info(job, msg->global_task_ids[nodeid], msg->ifname, msg->ofname, msg->efname); return job; }