static void _process_terminated(List job_list, char *f[], int lc, int show_full, int len) { filetxt_job_rec_t *job = NULL; filetxt_job_rec_t *temp = NULL; _parse_line(f, (void **)&temp, len); if (temp == NULL) { error("Unknown proccess terminated"); return; } job = _find_job_record(job_list, temp->header, JOB_TERMINATED); if (!job) { /* fake it for now */ job = _create_filetxt_job_rec(temp->header); job->jobname = xstrdup("(unknown)"); debug("Note: JOB_TERMINATED record for job " "%u preceded " "other job records at line %d\n", temp->header.jobnum, lc); } else if (job->job_terminated_seen) { if (temp->status == JOB_NODE_FAIL) { /* multiple node failures - extra TERMINATED records */ debug("Note: Duplicate JOB_TERMINATED " "record (nf) for job %u at " "line %d\n", temp->header.jobnum, lc); /* JOB_TERMINATED/NF records may be preceded * by a JOB_TERMINATED/CA record; NF is much * more interesting. */ job->status = temp->status; goto finished; } fprintf(stderr, "Conflicting JOB_TERMINATED record (%s) for " "job %u at line %d -- ignoring it\n", job_state_string(temp->status), job->header.jobnum, lc); goto finished; } job->job_terminated_seen = 1; job->elapsed = temp->elapsed; job->end = temp->header.timestamp; job->status = temp->status; job->requid = temp->requid; job->exitcode = temp->exitcode; if (list_count(job->steps) > 1) job->track_steps = 1; job->show_full = show_full; finished: _destroy_filetxt_job_rec(temp); }
static void _process_suspend(List job_list, char *f[], int lc, int show_full, int len) { filetxt_job_rec_t *job = NULL; filetxt_job_rec_t *temp = NULL; _parse_line(f, (void **)&temp, len); job = _find_job_record(job_list, temp->header, JOB_SUSPEND); if (!job) { /* fake it for now */ job = _create_filetxt_job_rec(temp->header); job->jobname = xstrdup("(unknown)"); } job->show_full = show_full; if (job->status == JOB_SUSPENDED) job->elapsed -= temp->elapsed; //job->header.timestamp = temp->header.timestamp; job->status = temp->status; _destroy_filetxt_job_rec(temp); }
static void _process_step(List job_list, char *f[], int lc, int show_full, int len) { filetxt_job_rec_t *job = NULL; filetxt_step_rec_t *step = NULL; filetxt_step_rec_t *temp = NULL; _parse_line(f, (void **)&temp, len); job = _find_job_record(job_list, temp->header, JOB_STEP); if (temp->stepnum == -2) { _destroy_filetxt_step_rec(temp); return; } if (!job) { /* fake it for now */ job = _create_filetxt_job_rec(temp->header); job->jobname = xstrdup("(unknown)"); debug2("Note: JOB_STEP record %u.%u preceded " "JOB_START record at line %d\n", temp->header.jobnum, temp->stepnum, lc); } job->show_full = show_full; if ((step = _find_step_record(job, temp->stepnum))) { if (temp->status == JOB_RUNNING) { _destroy_filetxt_step_rec(temp); return;/* if "R" record preceded by F or CD; unusual */ } if (step->status != JOB_RUNNING) { /* if not JOB_RUNNING */ fprintf(stderr, "Conflicting JOB_STEP record for " "jobstep %u.%u at line %d " "-- ignoring it\n", step->header.jobnum, step->stepnum, lc); _destroy_filetxt_step_rec(temp); return; } step->status = temp->status; step->exitcode = temp->exitcode; step->ntasks = temp->ntasks; step->ncpus = temp->ncpus; step->elapsed = temp->elapsed; step->tot_cpu_sec = temp->tot_cpu_sec; step->tot_cpu_usec = temp->tot_cpu_usec; job->requid = temp->requid; step->requid = temp->requid; memcpy(&step->rusage, &temp->rusage, sizeof(struct rusage)); memcpy(&step->stats, &temp->stats, sizeof(slurmdb_stats_t)); xfree(step->stepname); step->stepname = xstrdup(temp->stepname); step->end = temp->header.timestamp; _destroy_filetxt_step_rec(temp); goto got_step; } step = temp; temp = NULL; list_append(job->steps, step); if (!job->track_steps) { /* If we don't have track_steps we want to see if we have multiple steps. If we only have 1 step check the job name against the step name in most all cases it will be different. If it is different print out the step separate. */ if (list_count(job->steps) > 1) job->track_steps = 1; else if (step && step->stepname && job->jobname) { if (strcmp(step->stepname, job->jobname)) job->track_steps = 1; } } if (job->header.timestamp == 0) job->header.timestamp = step->header.timestamp; job->job_step_seen = 1; job->ntasks += step->ntasks; if (!job->nodes || !strcmp(job->nodes, "(unknown)")) { xfree(job->nodes); job->nodes = xstrdup(step->nodes); } got_step: if (job->job_terminated_seen == 0) { /* If the job is still running, this is the most recent status */ if ( job->exitcode == 0 ) job->exitcode = step->exitcode; job->status = JOB_RUNNING; job->elapsed = step->header.timestamp - job->header.timestamp; } }
static int _parse_line(char *f[], void **data, int len) { int i = atoi(f[F_RECTYPE]); filetxt_job_rec_t **job = (filetxt_job_rec_t **)data; filetxt_step_rec_t **step = (filetxt_step_rec_t **)data; filetxt_header_t header; _parse_header(f, &header); switch(i) { case JOB_START: *job = _create_filetxt_job_rec(header); (*job)->jobname = xstrdup(f[F_JOBNAME]); (*job)->track_steps = atoi(f[F_TRACK_STEPS]); (*job)->priority = atoi(f[F_PRIORITY]); (*job)->ncpus = atoi(f[F_NCPUS]); (*job)->nodes = xstrdup(f[F_NODES]); for (i=0; (*job)->nodes[i]; i++) { /* discard trailing <CR> */ if (isspace((*job)->nodes[i])) (*job)->nodes[i] = '\0'; } if (!strcmp((*job)->nodes, "(null)")) { xfree((*job)->nodes); (*job)->nodes = xstrdup("(unknown)"); } if (len > F_JOB_ACCOUNT) { (*job)->account = xstrdup(f[F_JOB_ACCOUNT]); for (i=0; (*job)->account[i]; i++) { /* discard trailing <CR> */ if (isspace((*job)->account[i])) (*job)->account[i] = '\0'; } } break; case JOB_STEP: *step = _create_filetxt_step_rec(header); (*step)->stepnum = atoi(f[F_JOBSTEP]); (*step)->status = atoi(f[F_STATUS]); (*step)->exitcode = atoi(f[F_EXITCODE]); (*step)->ntasks = atoi(f[F_NTASKS]); (*step)->ncpus = atoi(f[F_STEPNCPUS]); (*step)->elapsed = atoi(f[F_ELAPSED]); (*step)->tot_cpu_sec = atoi(f[F_CPU_SEC]); (*step)->tot_cpu_usec = atoi(f[F_CPU_USEC]); (*step)->rusage.ru_utime.tv_sec = atoi(f[F_USER_SEC]); (*step)->rusage.ru_utime.tv_usec = atoi(f[F_USER_USEC]); (*step)->rusage.ru_stime.tv_sec = atoi(f[F_SYS_SEC]); (*step)->rusage.ru_stime.tv_usec = atoi(f[F_SYS_USEC]); (*step)->rusage.ru_maxrss = atoi(f[F_RSS]); (*step)->rusage.ru_ixrss = atoi(f[F_IXRSS]); (*step)->rusage.ru_idrss = atoi(f[F_IDRSS]); (*step)->rusage.ru_isrss = atoi(f[F_ISRSS]); (*step)->rusage.ru_minflt = atoi(f[F_MINFLT]); (*step)->rusage.ru_majflt = atoi(f[F_MAJFLT]); (*step)->rusage.ru_nswap = atoi(f[F_NSWAP]); (*step)->rusage.ru_inblock = atoi(f[F_INBLOCKS]); (*step)->rusage.ru_oublock = atoi(f[F_OUBLOCKS]); (*step)->rusage.ru_msgsnd = atoi(f[F_MSGSND]); (*step)->rusage.ru_msgrcv = atoi(f[F_MSGRCV]); (*step)->rusage.ru_nsignals = atoi(f[F_NSIGNALS]); (*step)->rusage.ru_nvcsw = atoi(f[F_NVCSW]); (*step)->rusage.ru_nivcsw = atoi(f[F_NIVCSW]); (*step)->stats.vsize_max = atoi(f[F_MAX_VSIZE]); if (len > F_STEPNODES) { (*step)->stats.vsize_max_taskid = atoi(f[F_MAX_VSIZE_TASK]); (*step)->stats.vsize_ave = atof(f[F_AVE_VSIZE]); (*step)->stats.rss_max = atoi(f[F_MAX_RSS]); (*step)->stats.rss_max_taskid = atoi(f[F_MAX_RSS_TASK]); (*step)->stats.rss_ave = atof(f[F_AVE_RSS]); (*step)->stats.pages_max = atoi(f[F_MAX_PAGES]); (*step)->stats.pages_max_taskid = atoi(f[F_MAX_PAGES_TASK]); (*step)->stats.pages_ave = atof(f[F_AVE_PAGES]); (*step)->stats.cpu_min = atoi(f[F_MIN_CPU]); (*step)->stats.cpu_min_taskid = atoi(f[F_MIN_CPU_TASK]); (*step)->stats.cpu_ave = atof(f[F_AVE_CPU]); (*step)->stepname = xstrdup(f[F_STEPNAME]); (*step)->nodes = xstrdup(f[F_STEPNODES]); } else { (*step)->stats.vsize_max_taskid = (uint16_t)NO_VAL; (*step)->stats.vsize_ave = (float)NO_VAL; (*step)->stats.rss_max = NO_VAL; (*step)->stats.rss_max_taskid = (uint16_t)NO_VAL; (*step)->stats.rss_ave = (float)NO_VAL; (*step)->stats.pages_max = NO_VAL; (*step)->stats.pages_max_taskid = (uint16_t)NO_VAL; (*step)->stats.pages_ave = (float)NO_VAL; (*step)->stats.cpu_min = NO_VAL; (*step)->stats.cpu_min_taskid = (uint16_t)NO_VAL; (*step)->stats.cpu_ave = (float)NO_VAL; (*step)->stepname = NULL; (*step)->nodes = NULL; } if (len > F_MIN_CPU_NODE) { (*step)->stats.vsize_max_nodeid = atoi(f[F_MAX_VSIZE_NODE]); (*step)->stats.rss_max_nodeid = atoi(f[F_MAX_RSS_NODE]); (*step)->stats.pages_max_nodeid = atoi(f[F_MAX_PAGES_NODE]); (*step)->stats.cpu_min_nodeid = atoi(f[F_MIN_CPU_NODE]); } else { (*step)->stats.vsize_max_nodeid = NO_VAL; (*step)->stats.rss_max_nodeid = NO_VAL; (*step)->stats.pages_max_nodeid = NO_VAL; (*step)->stats.cpu_min_nodeid = NO_VAL; } if (len > F_STEP_ACCOUNT) (*step)->account = xstrdup(f[F_STEP_ACCOUNT]); if (len > F_STEP_REQUID) (*step)->requid = atoi(f[F_STEP_REQUID]); break; case JOB_SUSPEND: case JOB_TERMINATED: *job = _create_filetxt_job_rec(header); (*job)->elapsed = atoi(f[F_TOT_ELAPSED]); (*job)->status = atoi(f[F_STATUS]); if (len > F_JOB_REQUID) (*job)->requid = atoi(f[F_JOB_REQUID]); if (len > F_JOB_EXITCODE) (*job)->exitcode = atoi(f[F_JOB_EXITCODE]); break; default: error("UNKNOWN TYPE %d",i); break; } return SLURM_SUCCESS; }