Пример #1
0
static void _process_terminated(List job_list, char *f[], int lc,
				int show_full, int len)
{
	filetxt_job_rec_t *job = NULL;
	filetxt_job_rec_t *temp = NULL;

	_parse_line(f, (void **)&temp, len);

	if (temp == NULL) {
		error("Unknown proccess terminated");
		return;
	}

	job = _find_job_record(job_list, temp->header, JOB_TERMINATED);
	if (!job) {	/* fake it for now */
		job = _create_filetxt_job_rec(temp->header);
		job->jobname = xstrdup("(unknown)");
		debug("Note: JOB_TERMINATED record for job "
		      "%u preceded "
		      "other job records at line %d\n",
		      temp->header.jobnum, lc);
	} else if (job->job_terminated_seen) {
		if (temp->status == JOB_NODE_FAIL) {
			/* multiple node failures - extra TERMINATED records */
			debug("Note: Duplicate JOB_TERMINATED "
			      "record (nf) for job %u at "
			      "line %d\n",
			      temp->header.jobnum, lc);
			/* JOB_TERMINATED/NF records may be preceded
			 * by a JOB_TERMINATED/CA record; NF is much
			 * more interesting.
			 */
			job->status = temp->status;
			goto finished;
		}

		fprintf(stderr,
			"Conflicting JOB_TERMINATED record (%s) for "
			"job %u at line %d -- ignoring it\n",
			job_state_string(temp->status),
			job->header.jobnum, lc);
		goto finished;
	}
	job->job_terminated_seen = 1;
	job->elapsed = temp->elapsed;
	job->end = temp->header.timestamp;
	job->status = temp->status;
	job->requid = temp->requid;
	job->exitcode = temp->exitcode;
	if (list_count(job->steps) > 1)
		job->track_steps = 1;
	job->show_full = show_full;

finished:
	_destroy_filetxt_job_rec(temp);
}
Пример #2
0
static void _process_suspend(List job_list, char *f[], int lc,
			     int show_full, int len)
{
	filetxt_job_rec_t *job = NULL;
	filetxt_job_rec_t *temp = NULL;

	_parse_line(f, (void **)&temp, len);
	job = _find_job_record(job_list, temp->header, JOB_SUSPEND);
	if (!job)  {	/* fake it for now */
		job = _create_filetxt_job_rec(temp->header);
		job->jobname = xstrdup("(unknown)");
	}

	job->show_full = show_full;
	if (job->status == JOB_SUSPENDED)
		job->elapsed -= temp->elapsed;

	//job->header.timestamp = temp->header.timestamp;
	job->status = temp->status;
	_destroy_filetxt_job_rec(temp);
}
Пример #3
0
static void _process_step(List job_list, char *f[], int lc,
			  int show_full, int len)
{
	filetxt_job_rec_t *job = NULL;

	filetxt_step_rec_t *step = NULL;
	filetxt_step_rec_t *temp = NULL;

	_parse_line(f, (void **)&temp, len);

	job = _find_job_record(job_list, temp->header, JOB_STEP);

	if (temp->stepnum == -2) {
		_destroy_filetxt_step_rec(temp);
		return;
	}
	if (!job) {	/* fake it for now */
		job = _create_filetxt_job_rec(temp->header);
		job->jobname = xstrdup("(unknown)");
		debug2("Note: JOB_STEP record %u.%u preceded "
		       "JOB_START record at line %d\n",
		       temp->header.jobnum, temp->stepnum, lc);
	}
	job->show_full = show_full;

	if ((step = _find_step_record(job, temp->stepnum))) {

		if (temp->status == JOB_RUNNING) {
			_destroy_filetxt_step_rec(temp);
			return;/* if "R" record preceded by F or CD; unusual */
		}
		if (step->status != JOB_RUNNING) { /* if not JOB_RUNNING */
			fprintf(stderr,
				"Conflicting JOB_STEP record for "
				"jobstep %u.%u at line %d "
				"-- ignoring it\n",
				step->header.jobnum,
				step->stepnum, lc);
			_destroy_filetxt_step_rec(temp);
			return;
		}
		step->status = temp->status;
		step->exitcode = temp->exitcode;
		step->ntasks = temp->ntasks;
		step->ncpus = temp->ncpus;
		step->elapsed = temp->elapsed;
		step->tot_cpu_sec = temp->tot_cpu_sec;
		step->tot_cpu_usec = temp->tot_cpu_usec;
		job->requid = temp->requid;
		step->requid = temp->requid;
		memcpy(&step->rusage, &temp->rusage, sizeof(struct rusage));
		memcpy(&step->stats, &temp->stats, sizeof(slurmdb_stats_t));
		xfree(step->stepname);
		step->stepname = xstrdup(temp->stepname);
		step->end = temp->header.timestamp;
		_destroy_filetxt_step_rec(temp);
		goto got_step;
	}
	step = temp;
	temp = NULL;
	list_append(job->steps, step);
	if (!job->track_steps) {
		/* If we don't have track_steps we want to see
		   if we have multiple steps.  If we only have
		   1 step check the job name against the step
		   name in most all cases it will be
		   different.  If it is different print out
		   the step separate.
		*/
		if (list_count(job->steps) > 1)
			job->track_steps = 1;
		else if (step && step->stepname && job->jobname) {
			if (strcmp(step->stepname, job->jobname))
				job->track_steps = 1;
		}
	}

	if (job->header.timestamp == 0)
		job->header.timestamp = step->header.timestamp;
	job->job_step_seen = 1;
	job->ntasks += step->ntasks;
	if (!job->nodes || !strcmp(job->nodes, "(unknown)")) {
		xfree(job->nodes);
		job->nodes = xstrdup(step->nodes);
	}

got_step:

	if (job->job_terminated_seen == 0) {	/* If the job is still running,
						   this is the most recent
						   status */
		if ( job->exitcode == 0 )
			job->exitcode = step->exitcode;
		job->status = JOB_RUNNING;
		job->elapsed = step->header.timestamp - job->header.timestamp;
	}
}
Пример #4
0
static int _parse_line(char *f[], void **data, int len)
{
	int i = atoi(f[F_RECTYPE]);
	filetxt_job_rec_t **job = (filetxt_job_rec_t **)data;
	filetxt_step_rec_t **step = (filetxt_step_rec_t **)data;
	filetxt_header_t header;
	_parse_header(f, &header);

	switch(i) {
	case JOB_START:
		*job = _create_filetxt_job_rec(header);
		(*job)->jobname = xstrdup(f[F_JOBNAME]);
		(*job)->track_steps = atoi(f[F_TRACK_STEPS]);
		(*job)->priority = atoi(f[F_PRIORITY]);
		(*job)->ncpus = atoi(f[F_NCPUS]);
		(*job)->nodes = xstrdup(f[F_NODES]);

		for (i=0; (*job)->nodes[i]; i++) { /* discard trailing <CR> */
			if (isspace((*job)->nodes[i]))
				(*job)->nodes[i] = '\0';
		}
		if (!strcmp((*job)->nodes, "(null)")) {
			xfree((*job)->nodes);
			(*job)->nodes = xstrdup("(unknown)");
		}
		if (len > F_JOB_ACCOUNT) {
			(*job)->account = xstrdup(f[F_JOB_ACCOUNT]);
			for (i=0; (*job)->account[i]; i++) {
				/* discard trailing <CR> */
				if (isspace((*job)->account[i]))
					(*job)->account[i] = '\0';
			}
		}
		break;
	case JOB_STEP:
		*step = _create_filetxt_step_rec(header);
		(*step)->stepnum = atoi(f[F_JOBSTEP]);
		(*step)->status = atoi(f[F_STATUS]);
		(*step)->exitcode = atoi(f[F_EXITCODE]);
		(*step)->ntasks = atoi(f[F_NTASKS]);
		(*step)->ncpus = atoi(f[F_STEPNCPUS]);
		(*step)->elapsed = atoi(f[F_ELAPSED]);
		(*step)->tot_cpu_sec = atoi(f[F_CPU_SEC]);
		(*step)->tot_cpu_usec = atoi(f[F_CPU_USEC]);
		(*step)->rusage.ru_utime.tv_sec = atoi(f[F_USER_SEC]);
		(*step)->rusage.ru_utime.tv_usec = atoi(f[F_USER_USEC]);
		(*step)->rusage.ru_stime.tv_sec = atoi(f[F_SYS_SEC]);
		(*step)->rusage.ru_stime.tv_usec = atoi(f[F_SYS_USEC]);
		(*step)->rusage.ru_maxrss = atoi(f[F_RSS]);
		(*step)->rusage.ru_ixrss = atoi(f[F_IXRSS]);
		(*step)->rusage.ru_idrss = atoi(f[F_IDRSS]);
		(*step)->rusage.ru_isrss = atoi(f[F_ISRSS]);
		(*step)->rusage.ru_minflt = atoi(f[F_MINFLT]);
		(*step)->rusage.ru_majflt = atoi(f[F_MAJFLT]);
		(*step)->rusage.ru_nswap = atoi(f[F_NSWAP]);
		(*step)->rusage.ru_inblock = atoi(f[F_INBLOCKS]);
		(*step)->rusage.ru_oublock = atoi(f[F_OUBLOCKS]);
		(*step)->rusage.ru_msgsnd = atoi(f[F_MSGSND]);
		(*step)->rusage.ru_msgrcv = atoi(f[F_MSGRCV]);
		(*step)->rusage.ru_nsignals = atoi(f[F_NSIGNALS]);
		(*step)->rusage.ru_nvcsw = atoi(f[F_NVCSW]);
		(*step)->rusage.ru_nivcsw = atoi(f[F_NIVCSW]);
		(*step)->stats.vsize_max = atoi(f[F_MAX_VSIZE]);
		if (len > F_STEPNODES) {
			(*step)->stats.vsize_max_taskid =
				atoi(f[F_MAX_VSIZE_TASK]);
			(*step)->stats.vsize_ave = atof(f[F_AVE_VSIZE]);
			(*step)->stats.rss_max = atoi(f[F_MAX_RSS]);
			(*step)->stats.rss_max_taskid =
				atoi(f[F_MAX_RSS_TASK]);
			(*step)->stats.rss_ave = atof(f[F_AVE_RSS]);
			(*step)->stats.pages_max = atoi(f[F_MAX_PAGES]);
			(*step)->stats.pages_max_taskid =
				atoi(f[F_MAX_PAGES_TASK]);
			(*step)->stats.pages_ave = atof(f[F_AVE_PAGES]);
			(*step)->stats.cpu_min = atoi(f[F_MIN_CPU]);
			(*step)->stats.cpu_min_taskid =
				atoi(f[F_MIN_CPU_TASK]);
			(*step)->stats.cpu_ave = atof(f[F_AVE_CPU]);
			(*step)->stepname = xstrdup(f[F_STEPNAME]);
			(*step)->nodes = xstrdup(f[F_STEPNODES]);
		} else {
			(*step)->stats.vsize_max_taskid = (uint16_t)NO_VAL;
			(*step)->stats.vsize_ave = (float)NO_VAL;
			(*step)->stats.rss_max = NO_VAL;
			(*step)->stats.rss_max_taskid = (uint16_t)NO_VAL;
			(*step)->stats.rss_ave = (float)NO_VAL;
			(*step)->stats.pages_max = NO_VAL;
			(*step)->stats.pages_max_taskid = (uint16_t)NO_VAL;
			(*step)->stats.pages_ave = (float)NO_VAL;
			(*step)->stats.cpu_min = NO_VAL;
			(*step)->stats.cpu_min_taskid = (uint16_t)NO_VAL;
			(*step)->stats.cpu_ave =  (float)NO_VAL;
			(*step)->stepname = NULL;
			(*step)->nodes = NULL;
		}
		if (len > F_MIN_CPU_NODE) {
			(*step)->stats.vsize_max_nodeid =
				atoi(f[F_MAX_VSIZE_NODE]);
			(*step)->stats.rss_max_nodeid =
				atoi(f[F_MAX_RSS_NODE]);
			(*step)->stats.pages_max_nodeid =
				atoi(f[F_MAX_PAGES_NODE]);
			(*step)->stats.cpu_min_nodeid =
				atoi(f[F_MIN_CPU_NODE]);
		} else {
			(*step)->stats.vsize_max_nodeid = NO_VAL;
			(*step)->stats.rss_max_nodeid = NO_VAL;
			(*step)->stats.pages_max_nodeid = NO_VAL;
			(*step)->stats.cpu_min_nodeid = NO_VAL;
		}
		if (len > F_STEP_ACCOUNT)
			(*step)->account = xstrdup(f[F_STEP_ACCOUNT]);
		if (len > F_STEP_REQUID)
			(*step)->requid = atoi(f[F_STEP_REQUID]);
		break;
	case JOB_SUSPEND:
	case JOB_TERMINATED:
		*job = _create_filetxt_job_rec(header);
		(*job)->elapsed = atoi(f[F_TOT_ELAPSED]);
		(*job)->status = atoi(f[F_STATUS]);
		if (len > F_JOB_REQUID)
			(*job)->requid = atoi(f[F_JOB_REQUID]);
		if (len > F_JOB_EXITCODE)
			(*job)->exitcode = atoi(f[F_JOB_EXITCODE]);
		break;
	default:
		error("UNKNOWN TYPE %d",i);
		break;
	}
	return SLURM_SUCCESS;
}