static int _unpack_step_complete_msg(dbd_step_comp_msg_t **msg, uint16_t rpc_version, Buf buffer) { uint32_t uint32_tmp; dbd_step_comp_msg_t *msg_ptr = xmalloc(sizeof(dbd_step_comp_msg_t)); *msg = msg_ptr; if (rpc_version >= SLURM_MIN_PROTOCOL_VERSION) { safe_unpack32(&msg_ptr->assoc_id, buffer); safe_unpack64(&msg_ptr->db_index, buffer); safe_unpack_time(&msg_ptr->end_time, buffer); safe_unpack32(&msg_ptr->exit_code, buffer); jobacctinfo_unpack((struct jobacctinfo **)&msg_ptr->jobacct, rpc_version, PROTOCOL_TYPE_DBD, buffer, 1); safe_unpack32(&msg_ptr->job_id, buffer); safe_unpack_time(&msg_ptr->job_submit_time, buffer); safe_unpackstr_xmalloc(&msg_ptr->job_tres_alloc_str, &uint32_tmp, buffer); safe_unpack32(&msg_ptr->req_uid, buffer); safe_unpack_time(&msg_ptr->start_time, buffer); safe_unpack16(&msg_ptr->state, buffer); safe_unpack32(&msg_ptr->step_id, buffer); safe_unpack32(&msg_ptr->total_tasks, buffer); } else goto unpack_error; return SLURM_SUCCESS; unpack_error: debug2("slurmdbd_unpack_step_complete_msg:" "unpack_error: size_buf(buffer) %u", size_buf(buffer)); slurmdbd_free_step_complete_msg(msg_ptr); *msg = NULL; return SLURM_ERROR; }
extern int jobacctinfo_getinfo( jobacctinfo_t *jobacct, enum jobacct_data_type type, void *data, uint16_t protocol_version) { int rc = SLURM_SUCCESS; int *fd = (int *)data; uint32_t *uint32 = (uint32_t *) data; uint64_t *uint64 = (uint64_t *) data; double *dub = (double *) data; jobacct_id_t *jobacct_id = (jobacct_id_t *) data; struct rusage *rusage = (struct rusage *)data; struct jobacctinfo *send = (struct jobacctinfo *) data; char *buf = NULL; if (!plugin_polling) return SLURM_SUCCESS; /* jobacct needs to be allocated before this is called. */ xassert(jobacct); switch (type) { case JOBACCT_DATA_TOTAL: memcpy(send, jobacct, sizeof(struct jobacctinfo)); break; case JOBACCT_DATA_PIPE: if (protocol_version >= SLURM_MIN_PROTOCOL_VERSION) { int len; Buf buffer; safe_read(*fd, &len, sizeof(int)); buf = xmalloc(len); safe_read(*fd, buf, len); buffer = create_buf(buf, len); jobacctinfo_unpack(&jobacct, protocol_version, PROTOCOL_TYPE_SLURM, buffer, 0); free_buf(buffer); } break; case JOBACCT_DATA_RUSAGE: memset(rusage, 0, sizeof(struct rusage)); rusage->ru_utime.tv_sec = jobacct->user_cpu_sec; rusage->ru_utime.tv_usec = jobacct->user_cpu_usec; rusage->ru_stime.tv_sec = jobacct->sys_cpu_sec; rusage->ru_stime.tv_usec = jobacct->sys_cpu_usec; break; case JOBACCT_DATA_MAX_RSS: *uint64 = jobacct->max_rss; break; case JOBACCT_DATA_MAX_RSS_ID: *jobacct_id = jobacct->max_rss_id; break; case JOBACCT_DATA_TOT_RSS: *uint64 = jobacct->tot_rss; break; case JOBACCT_DATA_MAX_VSIZE: *uint64 = jobacct->max_vsize; break; case JOBACCT_DATA_MAX_VSIZE_ID: *jobacct_id = jobacct->max_vsize_id; break; case JOBACCT_DATA_TOT_VSIZE: *uint64 = jobacct->tot_vsize; break; case JOBACCT_DATA_MAX_PAGES: *uint64 = jobacct->max_pages; break; case JOBACCT_DATA_MAX_PAGES_ID: *jobacct_id = jobacct->max_pages_id; break; case JOBACCT_DATA_TOT_PAGES: *uint64 = jobacct->tot_pages; break; case JOBACCT_DATA_MIN_CPU: *uint32 = jobacct->min_cpu; break; case JOBACCT_DATA_MIN_CPU_ID: *jobacct_id = jobacct->min_cpu_id; break; case JOBACCT_DATA_TOT_CPU: *dub = jobacct->tot_cpu; break; case JOBACCT_DATA_ACT_CPUFREQ: *uint32 = jobacct->act_cpufreq; break; case JOBACCT_DATA_CONSUMED_ENERGY: *uint64 = jobacct->energy.consumed_energy; break; case JOBACCT_DATA_MAX_DISK_READ: *dub = jobacct->max_disk_read; break; case JOBACCT_DATA_MAX_DISK_READ_ID: *jobacct_id = jobacct->max_disk_read_id; break; case JOBACCT_DATA_TOT_DISK_READ: *dub = jobacct->tot_disk_read; break; case JOBACCT_DATA_MAX_DISK_WRITE: *dub = jobacct->max_disk_write; break; case JOBACCT_DATA_MAX_DISK_WRITE_ID: *jobacct_id = jobacct->max_disk_write_id; break; case JOBACCT_DATA_TOT_DISK_WRITE: *dub = jobacct->tot_disk_write; break; default: debug("jobacct_g_set_getinfo data_type %d invalid", type); } return rc; rwfail: xfree(buf); return SLURM_ERROR; }
static int _handle_completion(int fd, stepd_step_rec_t *job, uid_t uid) { int rc = SLURM_SUCCESS; int errnum = 0; int first; int last; jobacctinfo_t *jobacct = NULL; int step_rc; char* buf; int len; Buf buffer; int version; /* For future use */ bool lock_set = false; debug("_handle_completion for job %u.%u", job->jobid, job->stepid); debug3(" uid = %d", uid); if (!_slurm_authorized_user(uid)) { debug("step completion message from uid %ld for job %u.%u ", (long)uid, job->jobid, job->stepid); rc = -1; errnum = EPERM; /* Send the return code and errno */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); return SLURM_SUCCESS; } safe_read(fd, &version, sizeof(int)); safe_read(fd, &first, sizeof(int)); safe_read(fd, &last, sizeof(int)); safe_read(fd, &step_rc, sizeof(int)); /* * We must not use getinfo over a pipe with slurmd here * Indeed, slurmstepd does a large use of setinfo over a pipe * with slurmd and doing the reverse can result in a deadlock * scenario with slurmd : * slurmd(lockforread,write)/slurmstepd(write,lockforread) * Do pack/unpack instead to be sure of independances of * slurmd and slurmstepd */ safe_read(fd, &len, sizeof(int)); buf = xmalloc(len); safe_read(fd, buf, len); buffer = create_buf(buf, len); jobacctinfo_unpack(&jobacct, SLURM_PROTOCOL_VERSION, PROTOCOL_TYPE_SLURM, buffer, 1); free_buf(buffer); /* * Record the completed nodes */ pthread_mutex_lock(&step_complete.lock); lock_set = true; if (! step_complete.wait_children) { rc = -1; errnum = ETIMEDOUT; /* not used anyway */ goto timeout; } /* SlurmUser or root can craft a launch without a valid credential * ("srun --no-alloc ...") and no tree information can be built * without the hostlist from the credential. */ if (step_complete.rank >= 0) { #if 0 char bits_string[128]; debug2("Setting range %d (bit %d) through %d(bit %d)", first, first-(step_complete.rank+1), last, last-(step_complete.rank+1)); bit_fmt(bits_string, sizeof(bits_string), step_complete.bits); debug2(" before bits: %s", bits_string); #endif bit_nset(step_complete.bits, first - (step_complete.rank+1), last - (step_complete.rank+1)); #if 0 bit_fmt(bits_string, sizeof(bits_string), step_complete.bits); debug2(" after bits: %s", bits_string); #endif } step_complete.step_rc = MAX(step_complete.step_rc, step_rc); /************* acct stuff ********************/ jobacctinfo_aggregate(step_complete.jobacct, jobacct); timeout: jobacctinfo_destroy(jobacct); /*********************************************/ /* Send the return code and errno, we do this within the locked * region to ensure that the stepd doesn't exit before we can * perform this send. */ safe_write(fd, &rc, sizeof(int)); safe_write(fd, &errnum, sizeof(int)); pthread_cond_signal(&step_complete.cond); pthread_mutex_unlock(&step_complete.lock); return SLURM_SUCCESS; rwfail: if (lock_set) { pthread_cond_signal(&step_complete.cond); pthread_mutex_unlock(&step_complete.lock); } return SLURM_FAILURE; }