static int _handle_stat_jobacct(int fd, stepd_step_rec_t *job, uid_t uid) { jobacctinfo_t *jobacct = NULL; jobacctinfo_t *temp_jobacct = NULL; int i = 0; int num_tasks = 0; debug("_handle_stat_jobacct for job %u.%u", job->jobid, job->stepid); debug3(" uid = %d", uid); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("stat jobacct from uid %ld for job %u.%u " "owned by uid %ld", (long)uid, job->jobid, job->stepid, (long)job->uid); /* Send NULL */ jobacctinfo_setinfo(jobacct, JOBACCT_DATA_PIPE, &fd, SLURM_PROTOCOL_VERSION); return SLURM_ERROR; } jobacct = jobacctinfo_create(NULL); debug3("num tasks = %d", job->node_tasks); for (i = 0; i < job->node_tasks; i++) { temp_jobacct = jobacct_gather_stat_task(job->task[i]->pid); if (temp_jobacct) { jobacctinfo_aggregate(jobacct, temp_jobacct); jobacctinfo_destroy(temp_jobacct); num_tasks++; } } jobacctinfo_setinfo(jobacct, JOBACCT_DATA_PIPE, &fd, SLURM_PROTOCOL_VERSION); safe_write(fd, &num_tasks, sizeof(int)); jobacctinfo_destroy(jobacct); return SLURM_SUCCESS; rwfail: jobacctinfo_destroy(jobacct); return SLURM_ERROR; }
static int _handle_completion(int fd, stepd_step_rec_t *job, uid_t uid) { int rc = SLURM_SUCCESS; int errnum = 0; int first; int last; jobacctinfo_t *jobacct = NULL; int step_rc; char* buf; int len; Buf buffer; int version; /* For future use */ bool lock_set = false; debug("_handle_completion for job %u.%u", job->jobid, job->stepid); debug3(" uid = %d", uid); if (!_slurm_authorized_user(uid)) { debug("step completion message from uid %ld for job %u.%u ", (long)uid, job->jobid, job->stepid); rc = -1; errnum = EPERM; /* Send the return code and errno */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; } safe_read(fd, &version, sizeof(int)); safe_read(fd, &first, sizeof(int)); safe_read(fd, &last, sizeof(int)); safe_read(fd, &step_rc, sizeof(int)); /* * We must not use getinfo over a pipe with slurmd here * Indeed, slurmstepd does a large use of setinfo over a pipe * with slurmd and doing the reverse can result in a deadlock * scenario with slurmd : * slurmd(lockforread,write)/slurmstepd(write,lockforread) * Do pack/unpack instead to be sure of independances of * slurmd and slurmstepd */ safe_read(fd, &len, sizeof(int)); buf = xmalloc(len); safe_read(fd, buf, len); buffer = create_buf(buf, len); jobacctinfo_unpack(&jobacct, SLURM_PROTOCOL_VERSION, PROTOCOL_TYPE_SLURM, buffer, 1); free_buf(buffer); /* * Record the completed nodes */ pthread_mutex_lock(&step_complete.lock); lock_set = true; if (! step_complete.wait_children) { rc = -1; errnum = ETIMEDOUT; /* not used anyway */ goto timeout; } /* SlurmUser or root can craft a launch without a valid credential * ("srun --no-alloc ...") and no tree information can be built * without the hostlist from the credential. */ if (step_complete.rank >= 0) { #if 0 char bits_string[128]; debug2("Setting range %d (bit %d) through %d(bit %d)", first, first-(step_complete.rank+1), last, last-(step_complete.rank+1)); bit_fmt(bits_string, sizeof(bits_string), step_complete.bits); debug2(" before bits: %s", bits_string); #endif bit_nset(step_complete.bits, first - (step_complete.rank+1), last - (step_complete.rank+1)); #if 0 bit_fmt(bits_string, sizeof(bits_string), step_complete.bits); debug2(" after bits: %s", bits_string); #endif } step_complete.step_rc = MAX(step_complete.step_rc, step_rc); /************* acct stuff ********************/ jobacctinfo_aggregate(step_complete.jobacct, jobacct); timeout: jobacctinfo_destroy(jobacct); /*********************************************/ /* Send the return code and errno, we do this within the locked * region to ensure that the stepd doesn't exit before we can * perform this send. */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); pthread_cond_signal(&step_complete.cond); pthread_mutex_unlock(&step_complete.lock); return SLURM_SUCCESS; rwfail: if (lock_set) { pthread_cond_signal(&step_complete.cond); pthread_mutex_unlock(&step_complete.lock); } return SLURM_FAILURE; }
/* Wait for the pid given and when it ends get and children it might * of left behind and wait on them instead. */ static void *_wait_extern_pid(void *args) { extern_pid_t *extern_pid = (extern_pid_t *)args; stepd_step_rec_t *job = extern_pid->job; pid_t pid = extern_pid->pid; jobacctinfo_t *jobacct = NULL; pid_t *pids = NULL; int npids = 0, i; char proc_stat_file[256]; /* Allow ~20x extra length */ FILE *stat_fp = NULL; int fd; char sbuf[256], *tmp, state[1]; int num_read, ppid; xfree(extern_pid); //info("waiting on pid %d", pid); _block_on_pid(pid); //info("done with pid %d %d: %m", pid, rc); jobacct = jobacct_gather_remove_task(pid); if (jobacct) { job->jobacct->energy.consumed_energy = 0; jobacctinfo_aggregate(job->jobacct, jobacct); jobacctinfo_destroy(jobacct); } acct_gather_profile_g_task_end(pid); /* See if we have any children of init left and add them to track. */ proctrack_g_get_pids(job->cont_id, &pids, &npids); for (i = 0; i < npids; i++) { snprintf(proc_stat_file, 256, "/proc/%d/stat", pids[i]); if (!(stat_fp = fopen(proc_stat_file, "r"))) continue; /* Assume the process went away */ fd = fileno(stat_fp); fcntl(fd, F_SETFD, FD_CLOEXEC); num_read = read(fd, sbuf, (sizeof(sbuf) - 1)); if (num_read <= 0) goto next_pid; sbuf[num_read] = '\0'; /* get to the end of cmd name */ tmp = strrchr(sbuf, ')'); *tmp = '\0'; /* replace trailing ')' with NULL */ /* skip space after ')' too */ sscanf(tmp + 2, "%c %d ", state, &ppid); if (ppid == 1) { debug2("adding tracking of orphaned process %d", pids[i]); _handle_add_extern_pid_internal(job, pids[i]); } next_pid: fclose(stat_fp); } return NULL; }