/* create a slurmd job structure from a launch tasks message */ extern stepd_step_rec_t *stepd_step_rec_create(launch_tasks_request_msg_t *msg, uint16_t protocol_version) { stepd_step_rec_t *job = NULL; srun_info_t *srun = NULL; slurm_addr_t resp_addr; slurm_addr_t io_addr; int i, nodeid = NO_VAL; xassert(msg != NULL); xassert(msg->complete_nodelist != NULL); debug3("entering stepd_step_rec_create"); if (acct_gather_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq)) return NULL; job = xmalloc(sizeof(stepd_step_rec_t)); job->msg = msg; #ifndef HAVE_FRONT_END nodeid = nodelist_find(msg->complete_nodelist, conf->node_name); job->node_name = xstrdup(conf->node_name); #else nodeid = 0; job->node_name = xstrdup(msg->complete_nodelist); #endif if (nodeid < 0) { error("couldn't find node %s in %s", job->node_name, msg->complete_nodelist); stepd_step_rec_destroy(job); return NULL; } job->state = SLURMSTEPD_STEP_STARTING; slurm_cond_init(&job->state_cond, NULL); slurm_mutex_init(&job->state_mutex); job->node_tasks = msg->tasks_to_launch[nodeid]; i = sizeof(uint16_t) * msg->nnodes; job->task_cnts = xmalloc(i); memcpy(job->task_cnts, msg->tasks_to_launch, i); job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->job_step_id; job->uid = (uid_t) msg->uid; job->gid = (gid_t) msg->gid; job->user_name = xstrdup(msg->user_name); job->ngids = (int) msg->ngids; job->gids = copy_gids(msg->ngids, msg->gids); job->cwd = xstrdup(msg->cwd); job->task_dist = msg->task_dist; job->cpu_bind_type = msg->cpu_bind_type; job->cpu_bind = xstrdup(msg->cpu_bind); job->mem_bind_type = msg->mem_bind_type; job->mem_bind = xstrdup(msg->mem_bind); job->cpu_freq_min = msg->cpu_freq_min; job->cpu_freq_max = msg->cpu_freq_max; job->cpu_freq_gov = msg->cpu_freq_gov; job->ckpt_dir = xstrdup(msg->ckpt_dir); job->restart_dir = xstrdup(msg->restart_dir); job->cpus_per_task = msg->cpus_per_task; job->env = _array_copy(msg->envc, msg->env); job->array_job_id = msg->job_id; job->array_task_id = NO_VAL; job->node_offset = msg->node_offset; /* Used for env vars */ job->pack_jobid = msg->pack_jobid; /* Used for env vars */ job->pack_nnodes = msg->pack_nnodes; /* Used for env vars */ if (msg->pack_nnodes && msg->pack_ntasks && msg->pack_task_cnts) { job->pack_ntasks = msg->pack_ntasks; /* Used for env vars */ i = sizeof(uint16_t) * msg->pack_nnodes; job->pack_task_cnts = xmalloc(i); memcpy(job->pack_task_cnts, msg->pack_task_cnts, i); } job->pack_offset = msg->pack_offset; /* Used for env vars & labels */ job->pack_task_offset = msg->pack_task_offset; /* Used for env vars & labels */ job->pack_node_list = xstrdup(msg->pack_node_list); for (i = 0; i < msg->envc; i++) { /* 1234567890123456789 */ if (!xstrncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19)) job->array_job_id = atoi(msg->env[i] + 19); /* 12345678901234567890 */ if (!xstrncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20)) job->array_task_id = atoi(msg->env[i] + 20); } job->eio = eio_handle_create(0); job->sruns = list_create((ListDelF) _srun_info_destructor); /* * Based on my testing the next 3 lists here could use the * eio_obj_destroy, but if you do you can get an invalid read. Since * these stay until the end of the job it isn't that big of a deal. */ job->clients = list_create(NULL); /* FIXME! Needs destructor */ job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */ job->incoming_count = 0; job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */ job->outgoing_count = 0; job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */ job->envtp = xmalloc(sizeof(env_t)); job->envtp->jobid = -1; job->envtp->stepid = -1; job->envtp->procid = -1; job->envtp->localid = -1; job->envtp->nodeid = -1; job->envtp->distribution = 0; job->envtp->cpu_bind_type = 0; job->envtp->cpu_bind = NULL; job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_dir = NULL; if (!msg->resp_port) msg->num_resp_port = 0; if (msg->num_resp_port) { job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port]; memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&resp_addr, msg->resp_port[nodeid % msg->num_resp_port], NULL); } else { memset(&resp_addr, 0, sizeof(slurm_addr_t)); } if (!msg->io_port) msg->flags |= LAUNCH_USER_MANAGED_IO; if ((msg->flags & LAUNCH_USER_MANAGED_IO) == 0) { memcpy(&io_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&io_addr, msg->io_port[nodeid % msg->num_io_port], NULL); } else { memset(&io_addr, 0, sizeof(slurm_addr_t)); } srun = srun_info_create(msg->cred, &resp_addr, &io_addr, protocol_version); job->profile = msg->profile; job->task_prolog = xstrdup(msg->task_prolog); job->task_epilog = xstrdup(msg->task_epilog); job->argc = msg->argc; job->argv = _array_copy(job->argc, msg->argv); job->nnodes = msg->nnodes; job->nodeid = nodeid; job->debug = msg->slurmd_debug; job->cpus = msg->node_cpus; job->job_core_spec = msg->job_core_spec; /* This needs to happen before acct_gather_profile_startpoll and only really looks at the profile in the job. */ acct_gather_profile_g_node_step_start(job); acct_gather_profile_startpoll(msg->acctg_freq, conf->job_acct_gather_freq); job->timelimit = (time_t) -1; job->flags = msg->flags; job->switch_job = msg->switch_job; job->open_mode = msg->open_mode; job->options = msg->options; format_core_allocs(msg->cred, conf->node_name, conf->cpus, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); /* If users have configured MemLimitEnforce=no * in their slurm.conf keep going. */ if (job->step_mem && conf->mem_limit_enforce) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->step_mem); } else if (job->job_mem && conf->mem_limit_enforce) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->job_mem); } #ifdef HAVE_ALPS_CRAY /* This is only used for Cray emulation mode where slurmd is used to * launch job steps. On a real Cray system, ALPS is used to launch * the tasks instead of SLURM. SLURM's task launch RPC does NOT * contain the reservation ID, so just use some non-zero value here * for testing purposes. */ job->resv_id = 1; select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID, &job->resv_id); #endif /* only need these values on the extern step, don't copy otherwise */ if ((msg->job_step_id == SLURM_EXTERN_CONT) && msg->x11) { job->x11 = msg->x11; job->x11_magic_cookie = xstrdup(msg->x11_magic_cookie); job->x11_target_host = xstrdup(msg->x11_target_host); job->x11_target_port = msg->x11_target_port; } get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); list_append(job->sruns, (void *) srun); _job_init_task_info(job, msg->global_task_ids, msg->ifname, msg->ofname, msg->efname); return job; }
/* create a slurmd job structure from a launch tasks message */ extern stepd_step_rec_t * stepd_step_rec_create(launch_tasks_request_msg_t *msg) { stepd_step_rec_t *job = NULL; srun_info_t *srun = NULL; slurm_addr_t resp_addr; slurm_addr_t io_addr; int i, nodeid = NO_VAL; xassert(msg != NULL); xassert(msg->complete_nodelist != NULL); debug3("entering stepd_step_rec_create"); if (!_valid_uid_gid((uid_t)msg->uid, &(msg->gid), &(msg->user_name))) return NULL; if (_check_acct_freq_task(msg->job_mem_lim, msg->acctg_freq)) return NULL; job = xmalloc(sizeof(stepd_step_rec_t)); #ifndef HAVE_FRONT_END nodeid = nodelist_find(msg->complete_nodelist, conf->node_name); job->node_name = xstrdup(conf->node_name); #else nodeid = 0; job->node_name = xstrdup(msg->complete_nodelist); #endif if (nodeid < 0) { error("couldn't find node %s in %s", job->node_name, msg->complete_nodelist); stepd_step_rec_destroy(job); return NULL; } job->state = SLURMSTEPD_STEP_STARTING; job->node_tasks = msg->tasks_to_launch[nodeid]; job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->job_step_id; job->uid = (uid_t) msg->uid; job->user_name = xstrdup(msg->user_name); job->gid = (gid_t) msg->gid; job->cwd = xstrdup(msg->cwd); job->task_dist = msg->task_dist; job->cpu_bind_type = msg->cpu_bind_type; job->cpu_bind = xstrdup(msg->cpu_bind); job->mem_bind_type = msg->mem_bind_type; job->mem_bind = xstrdup(msg->mem_bind); job->cpu_freq = msg->cpu_freq; job->ckpt_dir = xstrdup(msg->ckpt_dir); job->restart_dir = xstrdup(msg->restart_dir); job->cpus_per_task = msg->cpus_per_task; job->env = _array_copy(msg->envc, msg->env); job->array_job_id = msg->job_id; job->array_task_id = (uint16_t) NO_VAL; for (i = 0; i < msg->envc; i++) { /* 1234567890123456789 */ if (!strncmp(msg->env[i], "SLURM_ARRAY_JOB_ID=", 19)) job->array_job_id = atoi(msg->env[i] + 19); /* 12345678901234567890 */ if (!strncmp(msg->env[i], "SLURM_ARRAY_TASK_ID=", 20)) job->array_task_id = atoi(msg->env[i] + 20); } job->eio = eio_handle_create(); job->sruns = list_create((ListDelF) _srun_info_destructor); job->clients = list_create(NULL); /* FIXME! Needs destructor */ job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */ job->incoming_count = 0; job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */ job->outgoing_count = 0; job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */ job->envtp = xmalloc(sizeof(env_t)); job->envtp->jobid = -1; job->envtp->stepid = -1; job->envtp->procid = -1; job->envtp->localid = -1; job->envtp->nodeid = -1; job->envtp->distribution = 0; job->envtp->cpu_bind_type = 0; job->envtp->cpu_bind = NULL; job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_dir = NULL; job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port]; memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&resp_addr, msg->resp_port[nodeid % msg->num_resp_port], NULL); job->user_managed_io = msg->user_managed_io; if (!msg->user_managed_io) { memcpy(&io_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&io_addr, msg->io_port[nodeid % msg->num_io_port], NULL); } srun = srun_info_create(msg->cred, &resp_addr, &io_addr); job->buffered_stdio = msg->buffered_stdio; job->labelio = msg->labelio; job->profile = msg->profile; job->task_prolog = xstrdup(msg->task_prolog); job->task_epilog = xstrdup(msg->task_epilog); job->argc = msg->argc; job->argv = _array_copy(job->argc, msg->argv); job->nnodes = msg->nnodes; job->nodeid = nodeid; job->debug = msg->slurmd_debug; job->cpus = msg->cpus_allocated[nodeid]; /* This needs to happen before acct_gather_profile_startpoll and only really looks at the profile in the job. */ acct_gather_profile_g_node_step_start(job); acct_gather_profile_startpoll(msg->acctg_freq, conf->job_acct_gather_freq); job->multi_prog = msg->multi_prog; job->timelimit = (time_t) -1; job->task_flags = msg->task_flags; job->switch_job = msg->switch_job; job->pty = msg->pty; job->open_mode = msg->open_mode; job->options = msg->options; format_core_allocs(msg->cred, conf->node_name, conf->cpus, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); if (job->step_mem) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->step_mem); } else if (job->job_mem) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->job_mem); } #ifdef HAVE_ALPS_CRAY /* This is only used for Cray emulation mode where slurmd is used to * launch job steps. On a real Cray system, ALPS is used to launch * the tasks instead of SLURM. SLURM's task launch RPC does NOT * contain the reservation ID, so just use some non-zero value here * for testing purposes. */ job->resv_id = 1; select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID, &job->resv_id); #endif get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); list_append(job->sruns, (void *) srun); _job_init_task_info(job, msg->global_task_ids[nodeid], msg->ifname, msg->ofname, msg->efname); return job; }
/* create a slurmd job structure from a launch tasks message */ slurmd_job_t * job_create(launch_tasks_request_msg_t *msg) { struct passwd *pwd = NULL; slurmd_job_t *job = NULL; srun_info_t *srun = NULL; slurm_addr_t resp_addr; slurm_addr_t io_addr; int nodeid = NO_VAL; xassert(msg != NULL); xassert(msg->complete_nodelist != NULL); debug3("entering job_create"); if ((pwd = _pwd_create((uid_t)msg->uid)) == NULL) { error("uid %ld not found on system", (long) msg->uid); slurm_seterrno (ESLURMD_UID_NOT_FOUND); return NULL; } if (!_valid_gid(pwd, &(msg->gid))) { slurm_seterrno (ESLURMD_GID_NOT_FOUND); _pwd_destroy(pwd); return NULL; } if (msg->job_mem_lim && (msg->acctg_freq != (uint16_t) NO_VAL) && (msg->acctg_freq > conf->job_acct_gather_freq)) { error("Can't set frequency to %u, it is higher than %u. " "We need it to be at least at this level to " "monitor memory usage.", msg->acctg_freq, conf->job_acct_gather_freq); slurm_seterrno (ESLURMD_INVALID_ACCT_FREQ); _pwd_destroy(pwd); return NULL; } job = xmalloc(sizeof(slurmd_job_t)); #ifndef HAVE_FRONT_END nodeid = nodelist_find(msg->complete_nodelist, conf->node_name); job->node_name = xstrdup(conf->node_name); #else nodeid = 0; job->node_name = xstrdup(msg->complete_nodelist); #endif if(nodeid < 0) { error("couldn't find node %s in %s", job->node_name, msg->complete_nodelist); job_destroy(job); return NULL; } job->state = SLURMSTEPD_STEP_STARTING; job->pwd = pwd; job->node_tasks = msg->tasks_to_launch[nodeid]; job->ntasks = msg->ntasks; job->jobid = msg->job_id; job->stepid = msg->job_step_id; job->uid = (uid_t) msg->uid; job->gid = (gid_t) msg->gid; job->cwd = xstrdup(msg->cwd); job->task_dist = msg->task_dist; job->cpu_bind_type = msg->cpu_bind_type; job->cpu_bind = xstrdup(msg->cpu_bind); job->mem_bind_type = msg->mem_bind_type; job->mem_bind = xstrdup(msg->mem_bind); job->cpu_freq = msg->cpu_freq; job->ckpt_dir = xstrdup(msg->ckpt_dir); job->restart_dir = xstrdup(msg->restart_dir); job->cpus_per_task = msg->cpus_per_task; job->env = _array_copy(msg->envc, msg->env); job->eio = eio_handle_create(); job->sruns = list_create((ListDelF) _srun_info_destructor); job->clients = list_create(NULL); /* FIXME! Needs destructor */ job->stdout_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->stderr_eio_objs = list_create(NULL); /* FIXME! Needs destructor */ job->free_incoming = list_create(NULL); /* FIXME! Needs destructor */ job->incoming_count = 0; job->free_outgoing = list_create(NULL); /* FIXME! Needs destructor */ job->outgoing_count = 0; job->outgoing_cache = list_create(NULL); /* FIXME! Needs destructor */ job->envtp = xmalloc(sizeof(env_t)); job->envtp->jobid = -1; job->envtp->stepid = -1; job->envtp->procid = -1; job->envtp->localid = -1; job->envtp->nodeid = -1; job->envtp->distribution = 0; job->envtp->cpu_bind_type = 0; job->envtp->cpu_bind = NULL; job->envtp->mem_bind_type = 0; job->envtp->mem_bind = NULL; job->envtp->ckpt_dir = NULL; //job->envtp->comm_port = msg->resp_port[nodeid % msg->num_resp_port]; /*memcpy(&resp_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&resp_addr, msg->resp_port[nodeid % msg->num_resp_port], NULL); job->user_managed_io = msg->user_managed_io; if (!msg->user_managed_io) { memcpy(&io_addr, &msg->orig_addr, sizeof(slurm_addr_t)); slurm_set_addr(&io_addr, msg->io_port[nodeid % msg->num_io_port], NULL); }*/ //srun = srun_info_create(msg->cred, &resp_addr, &io_addr); srun = srun_info_create(NULL, NULL, NULL); job->buffered_stdio = msg->buffered_stdio; job->labelio = msg->labelio; job->task_prolog = xstrdup(msg->task_prolog); job->task_epilog = xstrdup(msg->task_epilog); job->argc = msg->argc; job->argv = _array_copy(job->argc, msg->argv); job->nnodes = msg->nnodes; job->nodeid = nodeid; job->debug = msg->slurmd_debug; job->cpus = msg->cpus_allocated[nodeid]; if (msg->acctg_freq != (uint16_t) NO_VAL) jobacct_gather_change_poll(msg->acctg_freq); job->multi_prog = msg->multi_prog; job->timelimit = (time_t) -1; job->task_flags = msg->task_flags; job->switch_job = msg->switch_job; job->pty = msg->pty; job->open_mode = msg->open_mode; job->options = msg->options; format_core_allocs(msg->cred, conf->node_name, &job->job_alloc_cores, &job->step_alloc_cores, &job->job_mem, &job->step_mem); if (job->step_mem) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->step_mem); } else if (job->job_mem) { jobacct_gather_set_mem_limit(job->jobid, job->stepid, job->job_mem); } #ifdef HAVE_CRAY /* This is only used for Cray emulation mode where slurmd is used to * launch job steps. On a real Cray system, ALPS is used to launch * the tasks instead of SLURM. SLURM's task launch RPC does NOT * contain the reservation ID, so just use some non-zero value here * for testing purposes. */ job->resv_id = 1; select_g_select_jobinfo_set(msg->select_jobinfo, SELECT_JOBDATA_RESV_ID, &job->resv_id); #endif get_cred_gres(msg->cred, conf->node_name, &job->job_gres_list, &job->step_gres_list); list_append(job->sruns, (void *) srun); _job_init_task_info(job, msg->global_task_ids[nodeid], msg->ifname, msg->ofname, msg->efname); return job; }