static void _process_start(List job_list, char *f[], int lc, int show_full, int len) { filetxt_job_rec_t *job = NULL; filetxt_job_rec_t *temp = NULL; _parse_line(f, (void **)&temp, len); job = _find_job_record(job_list, temp->header, JOB_START); if (job) { /* in slurm we can get 2 start records one for submit * and one for start, so look at the last one */ xfree(job->jobname); job->jobname = xstrdup(temp->jobname); job->track_steps = temp->track_steps; job->priority = temp->priority; job->ncpus = temp->ncpus; xfree(job->nodes); job->nodes = xstrdup(temp->nodes); xfree(job->account); job->account = xstrdup(temp->account); _destroy_filetxt_job_rec(temp); return; } job = temp; job->show_full = show_full; list_append(job_list, job); job->job_start_seen = 1; }
static void _process_terminated(List job_list, char *f[], int lc, int show_full, int len) { filetxt_job_rec_t *job = NULL; filetxt_job_rec_t *temp = NULL; _parse_line(f, (void **)&temp, len); if (temp == NULL) { error("Unknown proccess terminated"); return; } job = _find_job_record(job_list, temp->header, JOB_TERMINATED); if (!job) { /* fake it for now */ job = _create_filetxt_job_rec(temp->header); job->jobname = xstrdup("(unknown)"); debug("Note: JOB_TERMINATED record for job " "%u preceded " "other job records at line %d\n", temp->header.jobnum, lc); } else if (job->job_terminated_seen) { if (temp->status == JOB_NODE_FAIL) { /* multiple node failures - extra TERMINATED records */ debug("Note: Duplicate JOB_TERMINATED " "record (nf) for job %u at " "line %d\n", temp->header.jobnum, lc); /* JOB_TERMINATED/NF records may be preceded * by a JOB_TERMINATED/CA record; NF is much * more interesting. */ job->status = temp->status; goto finished; } fprintf(stderr, "Conflicting JOB_TERMINATED record (%s) for " "job %u at line %d -- ignoring it\n", job_state_string(temp->status), job->header.jobnum, lc); goto finished; } job->job_terminated_seen = 1; job->elapsed = temp->elapsed; job->end = temp->header.timestamp; job->status = temp->status; job->requid = temp->requid; job->exitcode = temp->exitcode; if (list_count(job->steps) > 1) job->track_steps = 1; job->show_full = show_full; finished: _destroy_filetxt_job_rec(temp); }
static void _process_suspend(List job_list, char *f[], int lc, int show_full, int len) { filetxt_job_rec_t *job = NULL; filetxt_job_rec_t *temp = NULL; _parse_line(f, (void **)&temp, len); job = _find_job_record(job_list, temp->header, JOB_SUSPEND); if (!job) { /* fake it for now */ job = _create_filetxt_job_rec(temp->header); job->jobname = xstrdup("(unknown)"); } job->show_full = show_full; if (job->status == JOB_SUSPENDED) job->elapsed -= temp->elapsed; //job->header.timestamp = temp->header.timestamp; job->status = temp->status; _destroy_filetxt_job_rec(temp); }
static void _process_step(List job_list, char *f[], int lc, int show_full, int len) { filetxt_job_rec_t *job = NULL; filetxt_step_rec_t *step = NULL; filetxt_step_rec_t *temp = NULL; _parse_line(f, (void **)&temp, len); job = _find_job_record(job_list, temp->header, JOB_STEP); if (temp->stepnum == -2) { _destroy_filetxt_step_rec(temp); return; } if (!job) { /* fake it for now */ job = _create_filetxt_job_rec(temp->header); job->jobname = xstrdup("(unknown)"); debug2("Note: JOB_STEP record %u.%u preceded " "JOB_START record at line %d\n", temp->header.jobnum, temp->stepnum, lc); } job->show_full = show_full; if ((step = _find_step_record(job, temp->stepnum))) { if (temp->status == JOB_RUNNING) { _destroy_filetxt_step_rec(temp); return;/* if "R" record preceded by F or CD; unusual */ } if (step->status != JOB_RUNNING) { /* if not JOB_RUNNING */ fprintf(stderr, "Conflicting JOB_STEP record for " "jobstep %u.%u at line %d " "-- ignoring it\n", step->header.jobnum, step->stepnum, lc); _destroy_filetxt_step_rec(temp); return; } step->status = temp->status; step->exitcode = temp->exitcode; step->ntasks = temp->ntasks; step->ncpus = temp->ncpus; step->elapsed = temp->elapsed; step->tot_cpu_sec = temp->tot_cpu_sec; step->tot_cpu_usec = temp->tot_cpu_usec; job->requid = temp->requid; step->requid = temp->requid; memcpy(&step->rusage, &temp->rusage, sizeof(struct rusage)); memcpy(&step->stats, &temp->stats, sizeof(slurmdb_stats_t)); xfree(step->stepname); step->stepname = xstrdup(temp->stepname); step->end = temp->header.timestamp; _destroy_filetxt_step_rec(temp); goto got_step; } step = temp; temp = NULL; list_append(job->steps, step); if (!job->track_steps) { /* If we don't have track_steps we want to see if we have multiple steps. If we only have 1 step check the job name against the step name in most all cases it will be different. If it is different print out the step separate. */ if (list_count(job->steps) > 1) job->track_steps = 1; else if (step && step->stepname && job->jobname) { if (strcmp(step->stepname, job->jobname)) job->track_steps = 1; } } if (job->header.timestamp == 0) job->header.timestamp = step->header.timestamp; job->job_step_seen = 1; job->ntasks += step->ntasks; if (!job->nodes || !strcmp(job->nodes, "(unknown)")) { xfree(job->nodes); job->nodes = xstrdup(step->nodes); } got_step: if (job->job_terminated_seen == 0) { /* If the job is still running, this is the most recent status */ if ( job->exitcode == 0 ) job->exitcode = step->exitcode; job->status = JOB_RUNNING; job->elapsed = step->header.timestamp - job->header.timestamp; } }