Пример #1
0
static void _process_start(List job_list, char *f[], int lc,
			   int show_full, int len)
{
	filetxt_job_rec_t *job = NULL;
	filetxt_job_rec_t *temp = NULL;

	_parse_line(f, (void **)&temp, len);
	job = _find_job_record(job_list, temp->header, JOB_START);
	if (job) {
		/* in slurm we can get 2 start records one for submit
		 * and one for start, so look at the last one */
		xfree(job->jobname);
		job->jobname = xstrdup(temp->jobname);
		job->track_steps = temp->track_steps;
		job->priority = temp->priority;
		job->ncpus = temp->ncpus;
		xfree(job->nodes);
		job->nodes = xstrdup(temp->nodes);
		xfree(job->account);
		job->account = xstrdup(temp->account);

		_destroy_filetxt_job_rec(temp);
		return;
	}

	job = temp;
	job->show_full = show_full;
	list_append(job_list, job);
	job->job_start_seen = 1;

}
Пример #2
0
static void _process_terminated(List job_list, char *f[], int lc,
				int show_full, int len)
{
	filetxt_job_rec_t *job = NULL;
	filetxt_job_rec_t *temp = NULL;

	_parse_line(f, (void **)&temp, len);

	if (temp == NULL) {
		error("Unknown proccess terminated");
		return;
	}

	job = _find_job_record(job_list, temp->header, JOB_TERMINATED);
	if (!job) {	/* fake it for now */
		job = _create_filetxt_job_rec(temp->header);
		job->jobname = xstrdup("(unknown)");
		debug("Note: JOB_TERMINATED record for job "
		      "%u preceded "
		      "other job records at line %d\n",
		      temp->header.jobnum, lc);
	} else if (job->job_terminated_seen) {
		if (temp->status == JOB_NODE_FAIL) {
			/* multiple node failures - extra TERMINATED records */
			debug("Note: Duplicate JOB_TERMINATED "
			      "record (nf) for job %u at "
			      "line %d\n",
			      temp->header.jobnum, lc);
			/* JOB_TERMINATED/NF records may be preceded
			 * by a JOB_TERMINATED/CA record; NF is much
			 * more interesting.
			 */
			job->status = temp->status;
			goto finished;
		}

		fprintf(stderr,
			"Conflicting JOB_TERMINATED record (%s) for "
			"job %u at line %d -- ignoring it\n",
			job_state_string(temp->status),
			job->header.jobnum, lc);
		goto finished;
	}
	job->job_terminated_seen = 1;
	job->elapsed = temp->elapsed;
	job->end = temp->header.timestamp;
	job->status = temp->status;
	job->requid = temp->requid;
	job->exitcode = temp->exitcode;
	if (list_count(job->steps) > 1)
		job->track_steps = 1;
	job->show_full = show_full;

finished:
	_destroy_filetxt_job_rec(temp);
}
Пример #3
0
static void _process_suspend(List job_list, char *f[], int lc,
			     int show_full, int len)
{
	filetxt_job_rec_t *job = NULL;
	filetxt_job_rec_t *temp = NULL;

	_parse_line(f, (void **)&temp, len);
	job = _find_job_record(job_list, temp->header, JOB_SUSPEND);
	if (!job)  {	/* fake it for now */
		job = _create_filetxt_job_rec(temp->header);
		job->jobname = xstrdup("(unknown)");
	}

	job->show_full = show_full;
	if (job->status == JOB_SUSPENDED)
		job->elapsed -= temp->elapsed;

	//job->header.timestamp = temp->header.timestamp;
	job->status = temp->status;
	_destroy_filetxt_job_rec(temp);
}
Пример #4
0
static void _process_step(List job_list, char *f[], int lc,
			  int show_full, int len)
{
	filetxt_job_rec_t *job = NULL;

	filetxt_step_rec_t *step = NULL;
	filetxt_step_rec_t *temp = NULL;

	_parse_line(f, (void **)&temp, len);

	job = _find_job_record(job_list, temp->header, JOB_STEP);

	if (temp->stepnum == -2) {
		_destroy_filetxt_step_rec(temp);
		return;
	}
	if (!job) {	/* fake it for now */
		job = _create_filetxt_job_rec(temp->header);
		job->jobname = xstrdup("(unknown)");
		debug2("Note: JOB_STEP record %u.%u preceded "
		       "JOB_START record at line %d\n",
		       temp->header.jobnum, temp->stepnum, lc);
	}
	job->show_full = show_full;

	if ((step = _find_step_record(job, temp->stepnum))) {

		if (temp->status == JOB_RUNNING) {
			_destroy_filetxt_step_rec(temp);
			return;/* if "R" record preceded by F or CD; unusual */
		}
		if (step->status != JOB_RUNNING) { /* if not JOB_RUNNING */
			fprintf(stderr,
				"Conflicting JOB_STEP record for "
				"jobstep %u.%u at line %d "
				"-- ignoring it\n",
				step->header.jobnum,
				step->stepnum, lc);
			_destroy_filetxt_step_rec(temp);
			return;
		}
		step->status = temp->status;
		step->exitcode = temp->exitcode;
		step->ntasks = temp->ntasks;
		step->ncpus = temp->ncpus;
		step->elapsed = temp->elapsed;
		step->tot_cpu_sec = temp->tot_cpu_sec;
		step->tot_cpu_usec = temp->tot_cpu_usec;
		job->requid = temp->requid;
		step->requid = temp->requid;
		memcpy(&step->rusage, &temp->rusage, sizeof(struct rusage));
		memcpy(&step->stats, &temp->stats, sizeof(slurmdb_stats_t));
		xfree(step->stepname);
		step->stepname = xstrdup(temp->stepname);
		step->end = temp->header.timestamp;
		_destroy_filetxt_step_rec(temp);
		goto got_step;
	}
	step = temp;
	temp = NULL;
	list_append(job->steps, step);
	if (!job->track_steps) {
		/* If we don't have track_steps we want to see
		   if we have multiple steps.  If we only have
		   1 step check the job name against the step
		   name in most all cases it will be
		   different.  If it is different print out
		   the step separate.
		*/
		if (list_count(job->steps) > 1)
			job->track_steps = 1;
		else if (step && step->stepname && job->jobname) {
			if (strcmp(step->stepname, job->jobname))
				job->track_steps = 1;
		}
	}

	if (job->header.timestamp == 0)
		job->header.timestamp = step->header.timestamp;
	job->job_step_seen = 1;
	job->ntasks += step->ntasks;
	if (!job->nodes || !strcmp(job->nodes, "(unknown)")) {
		xfree(job->nodes);
		job->nodes = xstrdup(step->nodes);
	}

got_step:

	if (job->job_terminated_seen == 0) {	/* If the job is still running,
						   this is the most recent
						   status */
		if ( job->exitcode == 0 )
			job->exitcode = step->exitcode;
		job->status = JOB_RUNNING;
		job->elapsed = step->header.timestamp - job->header.timestamp;
	}
}