static slurmd_job_t * _step_setup(slurm_addr_t *cli, slurm_addr_t *self, slurm_msg_t *msg) { slurmd_job_t *job = NULL; switch(msg->msg_type) { case REQUEST_BATCH_JOB_LAUNCH: debug2("setup for a batch_job"); job = mgr_launch_batch_job_setup(msg->data, cli); break; case REQUEST_LAUNCH_TASKS: debug2("setup for a launch_task"); job = mgr_launch_tasks_setup(msg->data, cli, self); break; default: fatal("handle_launch_message: Unrecognized launch RPC"); break; } if (!job) { error("_step_setup: no job returned"); return NULL; } job->jmgr_pid = getpid(); job->jobacct = jobacct_gather_g_create(NULL); /* Establish GRES environment variables */ if (conf->debug_flags & DEBUG_FLAG_GRES) { gres_plugin_job_state_log(job->job_gres_list, job->jobid); gres_plugin_step_state_log(job->step_gres_list, job->jobid, job->stepid); } if (msg->msg_type == REQUEST_BATCH_JOB_LAUNCH) gres_plugin_job_set_env(&job->env, job->job_gres_list); else if (msg->msg_type == REQUEST_LAUNCH_TASKS) gres_plugin_step_set_env(&job->env, job->step_gres_list); /* * Add slurmd node topology informations to job env array */ env_array_overwrite(&job->env,"SLURM_TOPOLOGY_ADDR", conf->node_topo_addr); env_array_overwrite(&job->env,"SLURM_TOPOLOGY_ADDR_PATTERN", conf->node_topo_pattern); return job; }
static int _handle_stat_jobacct(int fd, slurmd_job_t *job, uid_t uid) { jobacctinfo_t *jobacct = NULL; jobacctinfo_t *temp_jobacct = NULL; int i = 0; int num_tasks = 0; debug("_handle_stat_jobacct for job %u.%u", job->jobid, job->stepid); debug3(" uid = %d", uid); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("stat jobacct from uid %ld for job %u.%u " "owned by uid %ld", (long)uid, job->jobid, job->stepid, (long)job->uid); /* Send NULL */ jobacct_gather_g_setinfo(jobacct, JOBACCT_DATA_PIPE, &fd); return SLURM_ERROR; } jobacct = jobacct_gather_g_create(NULL); debug3("num tasks = %d", job->node_tasks); for (i = 0; i < job->node_tasks; i++) { temp_jobacct = jobacct_gather_g_stat_task(job->task[i]->pid); if(temp_jobacct) { jobacct_gather_g_aggregate(jobacct, temp_jobacct); jobacct_gather_g_destroy(temp_jobacct); num_tasks++; } } jobacct_gather_g_setinfo(jobacct, JOBACCT_DATA_PIPE, &fd); safe_write(fd, &num_tasks, sizeof(int)); jobacct_gather_g_destroy(jobacct); return SLURM_SUCCESS; rwfail: return SLURM_ERROR; }
static int _handle_completion(int fd, slurmd_job_t *job, uid_t uid) { int rc = SLURM_SUCCESS; int errnum = 0; int first; int last; jobacctinfo_t *jobacct = NULL; int step_rc; debug("_handle_completion for job %u.%u", job->jobid, job->stepid); debug3(" uid = %d", uid); if (!_slurm_authorized_user(uid)) { debug("step completion message from uid %ld for job %u.%u ", (long)uid, job->jobid, job->stepid); rc = -1; errnum = EPERM; /* Send the return code and errno */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; } safe_read(fd, &first, sizeof(int)); safe_read(fd, &last, sizeof(int)); safe_read(fd, &step_rc, sizeof(int)); jobacct = jobacct_gather_g_create(NULL); jobacct_gather_g_getinfo(jobacct, JOBACCT_DATA_PIPE, &fd); /* * Record the completed nodes */ pthread_mutex_lock(&step_complete.lock); if (! step_complete.wait_children) { rc = -1; errnum = ETIMEDOUT; /* not used anyway */ goto timeout; } /* SlurmUser or root can craft a launch without a valid credential * ("srun --no-alloc ...") and no tree information can be built * without the hostlist from the credential. */ if (step_complete.rank >= 0) { #if 0 char bits_string[128]; debug2("Setting range %d (bit %d) through %d(bit %d)", first, first-(step_complete.rank+1), last, last-(step_complete.rank+1)); bit_fmt(bits_string, sizeof(bits_string), step_complete.bits); debug2(" before bits: %s", bits_string); #endif bit_nset(step_complete.bits, first - (step_complete.rank+1), last - (step_complete.rank+1)); #if 0 bit_fmt(bits_string, sizeof(bits_string), step_complete.bits); debug2(" after bits: %s", bits_string); #endif } step_complete.step_rc = MAX(step_complete.step_rc, step_rc); /************* acct stuff ********************/ jobacct_gather_g_aggregate(step_complete.jobacct, jobacct); timeout: jobacct_gather_g_destroy(jobacct); /*********************************************/ /* Send the return code and errno, we do this within the locked * region to ensure that the stepd doesn't exit before we can * perform this send. */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); pthread_cond_signal(&step_complete.cond); pthread_mutex_unlock(&step_complete.lock); return SLURM_SUCCESS; rwfail: return SLURM_FAILURE; }
/* * This function handles the initialization information from slurmd * sent by _send_slurmstepd_init() in src/slurmd/slurmd/req.c. */ static int _init_from_slurmd(int sock, char **argv, slurm_addr_t **_cli, slurm_addr_t **_self, slurm_msg_t **_msg, int *_ngids, gid_t **_gids) { char *incoming_buffer = NULL; Buf buffer; int step_type; int len; slurm_addr_t *cli = NULL; slurm_addr_t *self = NULL; slurm_msg_t *msg = NULL; int ngids = 0; gid_t *gids = NULL; uint16_t port; char buf[16]; log_options_t lopts = LOG_OPTS_INITIALIZER; log_init(argv[0], lopts, LOG_DAEMON, NULL); /* receive job type from slurmd */ safe_read(sock, &step_type, sizeof(int)); debug3("step_type = %d", step_type); /* receive reverse-tree info from slurmd */ pthread_mutex_lock(&step_complete.lock); safe_read(sock, &step_complete.rank, sizeof(int)); safe_read(sock, &step_complete.parent_rank, sizeof(int)); safe_read(sock, &step_complete.children, sizeof(int)); safe_read(sock, &step_complete.depth, sizeof(int)); safe_read(sock, &step_complete.max_depth, sizeof(int)); safe_read(sock, &step_complete.parent_addr, sizeof(slurm_addr_t)); step_complete.bits = bit_alloc(step_complete.children); step_complete.jobacct = jobacct_gather_g_create(NULL); pthread_mutex_unlock(&step_complete.lock); /* receive conf from slurmd */ if ((conf = read_slurmd_conf_lite (sock)) == NULL) fatal("Failed to read conf from slurmd"); log_alter(conf->log_opts, 0, conf->logfile); debug2("debug level is %d.", conf->debug_level); /* acct info */ jobacct_gather_g_startpoll(conf->job_acct_gather_freq); switch_g_slurmd_step_init(); slurm_get_ip_str(&step_complete.parent_addr, &port, buf, 16); debug3("slurmstepd rank %d, parent address = %s, port = %u", step_complete.rank, buf, port); /* receive cli from slurmd */ safe_read(sock, &len, sizeof(int)); incoming_buffer = xmalloc(sizeof(char) * len); safe_read(sock, incoming_buffer, len); buffer = create_buf(incoming_buffer,len); cli = xmalloc(sizeof(slurm_addr_t)); if(slurm_unpack_slurm_addr_no_alloc(cli, buffer) == SLURM_ERROR) fatal("slurmstepd: problem with unpack of slurmd_conf"); free_buf(buffer); /* receive self from slurmd */ safe_read(sock, &len, sizeof(int)); if (len > 0) { /* receive packed self from main slurmd */ incoming_buffer = xmalloc(sizeof(char) * len); safe_read(sock, incoming_buffer, len); buffer = create_buf(incoming_buffer,len); self = xmalloc(sizeof(slurm_addr_t)); if (slurm_unpack_slurm_addr_no_alloc(self, buffer) == SLURM_ERROR) { fatal("slurmstepd: problem with unpack of " "slurmd_conf"); } free_buf(buffer); } /* Receive GRES information from slurmd */ gres_plugin_recv_stepd(sock); /* receive req from slurmd */ safe_read(sock, &len, sizeof(int)); incoming_buffer = xmalloc(sizeof(char) * len); safe_read(sock, incoming_buffer, len); buffer = create_buf(incoming_buffer,len); msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); switch(step_type) { case LAUNCH_BATCH_JOB: msg->msg_type = REQUEST_BATCH_JOB_LAUNCH; break; case LAUNCH_TASKS: msg->msg_type = REQUEST_LAUNCH_TASKS; break; default: fatal("Unrecognized launch RPC"); break; } if(unpack_msg(msg, buffer) == SLURM_ERROR) fatal("slurmstepd: we didn't unpack the request correctly"); free_buf(buffer); /* receive cached group ids array for the relevant uid */ safe_read(sock, &ngids, sizeof(int)); if (ngids > 0) { int i; uint32_t tmp32; gids = (gid_t *)xmalloc(sizeof(gid_t) * ngids); for (i = 0; i < ngids; i++) { safe_read(sock, &tmp32, sizeof(uint32_t)); gids[i] = (gid_t)tmp32; debug2("got gid %d", gids[i]); } } *_cli = cli; *_self = self; *_msg = msg; *_ngids = ngids; *_gids = gids; return 1; rwfail: fatal("Error reading initialization data from slurmd"); exit(1); }
static int _handle_completion(int fd, slurmd_job_t *job, uid_t uid, int protocol) { int rc = SLURM_SUCCESS; int errnum = 0; int first; int last; jobacctinfo_t *jobacct = NULL; int step_rc; char* buf; int len; Buf buffer; int version; /* For future use */ bool lock_set = false; debug("_handle_completion for job %u.%u", job->jobid, job->stepid); debug3(" uid = %d", uid); if (!_slurm_authorized_user(uid)) { debug("step completion message from uid %ld for job %u.%u ", (long)uid, job->jobid, job->stepid); rc = -1; errnum = EPERM; /* Send the return code and errno */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; } if (protocol >= 2) safe_read(fd, &version, sizeof(int)); safe_read(fd, &first, sizeof(int)); safe_read(fd, &last, sizeof(int)); safe_read(fd, &step_rc, sizeof(int)); if (protocol >= 2) { /* * We must not use getinfo over a pipe with slurmd here * Indeed, slurmstepd does a large use of setinfo over a pipe * with slurmd and doing the reverse can result in a deadlock * scenario with slurmd : * slurmd(lockforread,write)/slurmstepd(write,lockforread) * Do pack/unpack instead to be sure of independances of * slurmd and slurmstepd */ safe_read(fd, &len, sizeof(int)); buf = xmalloc(len); safe_read(fd, buf, len); buffer = create_buf(buf, len); jobacct_gather_g_unpack(&jobacct, SLURM_PROTOCOL_VERSION, buffer); free_buf(buffer); } else { jobacct = jobacct_gather_g_create(NULL); jobacct_gather_g_getinfo(jobacct, JOBACCT_DATA_PIPE, &fd); } /* * Record the completed nodes */ pthread_mutex_lock(&step_complete.lock); lock_set = true; if (! step_complete.wait_children) { rc = -1; errnum = ETIMEDOUT; /* not used anyway */ goto timeout; } /* SlurmUser or root can craft a launch without a valid credential * ("srun --no-alloc ...") and no tree information can be built * without the hostlist from the credential. */ if (step_complete.rank >= 0) { #if 0 char bits_string[128]; debug2("Setting range %d (bit %d) through %d(bit %d)", first, first-(step_complete.rank+1), last, last-(step_complete.rank+1)); bit_fmt(bits_string, sizeof(bits_string), step_complete.bits); debug2(" before bits: %s", bits_string); #endif bit_nset(step_complete.bits, first - (step_complete.rank+1), last - (step_complete.rank+1)); #if 0 bit_fmt(bits_string, sizeof(bits_string), step_complete.bits); debug2(" after bits: %s", bits_string); #endif } step_complete.step_rc = MAX(step_complete.step_rc, step_rc); /************* acct stuff ********************/ jobacct_gather_g_aggregate(step_complete.jobacct, jobacct); timeout: jobacct_gather_g_destroy(jobacct); /*********************************************/ /* Send the return code and errno, we do this within the locked * region to ensure that the stepd doesn't exit before we can * perform this send. */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); pthread_cond_signal(&step_complete.cond); pthread_mutex_unlock(&step_complete.lock); return SLURM_SUCCESS; rwfail: if (lock_set) { pthread_cond_signal(&step_complete.cond); pthread_mutex_unlock(&step_complete.lock); } return SLURM_FAILURE; }