/* * load into the storage a suspention of a job */ extern int jobacct_storage_p_suspend(void *db_conn, struct job_record *job_ptr) { char buf[BUFFER_SIZE]; static time_t now = 0; static time_t temp = 0; int elapsed; if (!storage_init) { debug("jobacct init was not called or it failed"); return SLURM_ERROR; } /* tell what time has passed */ if (!now) now = job_ptr->start_time; temp = now; now = time(NULL); if ((elapsed=now-temp) < 0) elapsed=0; /* For *very* short jobs, if clock is wrong */ /* here we are really just going for a marker in time to tell when * the process was suspended or resumed (check job state), we don't * really need to keep track of anything else */ snprintf(buf, BUFFER_SIZE, "%d %d %d", JOB_SUSPEND, elapsed, job_ptr->job_state & JOB_STATE_BASE);/* job status */ return _print_record(job_ptr, now, buf); }
/* * load into the storage the start of a job */ extern int jobacct_storage_p_job_start(void *db_conn, struct job_record *job_ptr) { int rc = SLURM_SUCCESS; char buf[BUFFER_SIZE], *account, *nodes; char *jname = NULL; long priority; int track_steps = 0; if (!storage_init) { debug("jobacct init was not called or it failed"); return SLURM_ERROR; } debug2("slurmdb_job_start() called"); if (job_ptr->start_time == 0) { /* This function is called when a job becomes elligible to run * in order to record reserved time (a measure of system * over-subscription). We only use this with database * plugins. */ return rc; } priority = (job_ptr->priority == NO_VAL) ? -1L : (long) job_ptr->priority; if (job_ptr->name && job_ptr->name[0]) { jname = _safe_dup(job_ptr->name); } else { jname = xstrdup("allocation"); track_steps = 1; } account= _safe_dup(job_ptr->account); if (job_ptr->nodes && job_ptr->nodes[0]) nodes = job_ptr->nodes; else nodes = "(null)"; if (job_ptr->batch_flag) track_steps = 1; job_ptr->requid = -1; /* force to -1 for stats to know this * hasn't been set yet */ snprintf(buf, BUFFER_SIZE, "%d %s %d %ld %u %s %s", JOB_START, jname, track_steps, priority, job_ptr->total_cpus, nodes, account); rc = _print_record(job_ptr, job_ptr->start_time, buf); xfree(account); xfree(jname); return rc; }
/* * load into the storage the end of a job */ extern int jobacct_storage_p_job_complete(void *db_conn, struct job_record *job_ptr) { char buf[BUFFER_SIZE]; uint16_t job_state; int duration; uint32_t exit_code; if (!storage_init) { debug("jobacct init was not called or it failed"); return SLURM_ERROR; } debug2("slurmdb_job_complete() called"); if (IS_JOB_RESIZING(job_ptr)) { job_state = JOB_RESIZING; if (job_ptr->resize_time) duration = time(NULL) - job_ptr->resize_time; else duration = time(NULL) - job_ptr->start_time; } else { if (job_ptr->end_time == 0) { debug("jobacct: job %u never started", job_ptr->job_id); return SLURM_ERROR; } job_state = job_ptr->job_state & JOB_STATE_BASE; if (job_ptr->resize_time) duration = job_ptr->end_time - job_ptr->resize_time; else duration = job_ptr->end_time - job_ptr->start_time; } exit_code = job_ptr->exit_code; if (exit_code == 1) { /* This wasn't signalled, it was set by Slurm so don't * treat it like a signal. */ exit_code = 256; } /* leave the requid as a %d since we want to see if it is -1 in stats */ snprintf(buf, BUFFER_SIZE, "%d %d %u %u %u", JOB_TERMINATED, duration, job_state, job_ptr->requid, exit_code); return _print_record(job_ptr, job_ptr->end_time, buf); }
void cdb_print_records(cdb_t *cdb, cdb_request_t *request, FILE *fh, const char *date_format) { uint64_t i = 0; uint64_t num_recs = 0; cdb_record_t *records = NULL; if (_cdb_read_records(cdb, request, &num_recs, &records) == CDB_SUCCESS) { for (i = 0; i < num_recs; i++) { _print_record(fh, records[i].time, records[i].value, date_format); } } free(records); }
void cdb_print_aggregate_records(cdb_t **cdbs, int num_cdbs, cdb_request_t *request, FILE *fh, const char *date_format) { uint64_t i = 0; uint64_t num_recs = 0; cdb_record_t *records = NULL; cdb_range_t *range = calloc(1, sizeof(cdb_range_t)); cdb_read_aggregate_records(cdbs, num_cdbs, request, &num_recs, &records, range); for (i = 0; i < num_recs; i++) { _print_record(fh, records[i].time, records[i].value, date_format); } free(range); free(records); }
/* * load into the storage the end of a job step */ extern int jobacct_storage_p_step_complete(void *db_conn, struct step_record *step_ptr) { char buf[BUFFER_SIZE]; time_t now; int elapsed; int comp_status; int cpus = 0, rc; char node_list[BUFFER_SIZE]; struct jobacctinfo *jobacct = (struct jobacctinfo *)step_ptr->jobacct; struct jobacctinfo dummy_jobacct; #ifdef HAVE_BG char *ionodes = NULL; #endif float ave_vsize = 0, ave_rss = 0, ave_pages = 0; float ave_cpu = 0; uint32_t ave_cpu2 = 0; char *account, *step_name; uint32_t exit_code; if (!storage_init) { debug("jobacct init was not called or it failed"); return SLURM_ERROR; } now = time(NULL); if (jobacct == NULL) { /* JobAcctGather=slurmdb_gather/none, no data to process */ memset(&dummy_jobacct, 0, sizeof(dummy_jobacct)); jobacct = &dummy_jobacct; } if ((elapsed=now-step_ptr->start_time)<0) elapsed=0; /* For *very* short jobs, if clock is wrong */ exit_code = step_ptr->exit_code; if (exit_code == NO_VAL) { comp_status = JOB_CANCELLED; exit_code = 0; } else if (exit_code) comp_status = JOB_FAILED; else comp_status = JOB_COMPLETE; #ifdef HAVE_BG if (step_ptr->job_ptr->details) cpus = step_ptr->job_ptr->details->min_cpus; else cpus = step_ptr->job_ptr->cpu_cnt; select_g_select_jobinfo_get(step_ptr->job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); if (ionodes) { snprintf(node_list, BUFFER_SIZE, "%s[%s]", step_ptr->job_ptr->nodes, ionodes); xfree(ionodes); } else snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->job_ptr->nodes); #else if (!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) { cpus = step_ptr->job_ptr->total_cpus; snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->job_ptr->nodes); } else { cpus = step_ptr->step_layout->task_cnt; snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->step_layout->node_list); } #endif /* figure out the ave of the totals sent */ if (cpus > 0) { ave_vsize = jobacct->tot_vsize; ave_vsize /= cpus; ave_rss = jobacct->tot_rss; ave_rss /= cpus; ave_pages = jobacct->tot_pages; ave_pages /= cpus; ave_cpu = jobacct->tot_cpu; ave_cpu /= cpus; } if (jobacct->min_cpu != (uint32_t)NO_VAL) { ave_cpu2 = jobacct->min_cpu; } account = _safe_dup(step_ptr->job_ptr->account); step_name = _safe_dup(step_ptr->name); snprintf(buf, BUFFER_SIZE, _jobstep_format, JOB_STEP, step_ptr->step_id, /* stepid */ comp_status, /* completion status */ exit_code, /* completion code */ cpus, /* number of tasks */ cpus, /* number of cpus */ elapsed, /* elapsed seconds */ /* total cputime seconds */ jobacct->user_cpu_sec + jobacct->sys_cpu_sec, /* total cputime seconds */ jobacct->user_cpu_usec + jobacct->sys_cpu_usec, jobacct->user_cpu_sec, /* user seconds */ jobacct->user_cpu_usec,/* user microseconds */ jobacct->sys_cpu_sec, /* system seconds */ jobacct->sys_cpu_usec,/* system microsecs */ 0, /* max rss */ 0, /* max ixrss */ 0, /* max idrss */ 0, /* max isrss */ 0, /* max minflt */ 0, /* max majflt */ 0, /* max nswap */ 0, /* total inblock */ 0, /* total outblock */ 0, /* total msgsnd */ 0, /* total msgrcv */ 0, /* total nsignals */ 0, /* total nvcsw */ 0, /* total nivcsw */ jobacct->max_vsize, /* max vsize */ jobacct->max_vsize_id.taskid, /* max vsize node */ ave_vsize, /* ave vsize */ jobacct->max_rss, /* max vsize */ jobacct->max_rss_id.taskid, /* max rss node */ ave_rss, /* ave rss */ jobacct->max_pages, /* max pages */ jobacct->max_pages_id.taskid, /* max pages node */ ave_pages, /* ave pages */ ave_cpu2, /* min cpu */ jobacct->min_cpu_id.taskid, /* min cpu node */ ave_cpu, /* ave cpu */ step_name, /* step exe name */ node_list, /* name of nodes step running on */ jobacct->max_vsize_id.nodeid, /* max vsize task */ jobacct->max_rss_id.nodeid, /* max rss task */ jobacct->max_pages_id.nodeid, /* max pages task */ jobacct->min_cpu_id.nodeid, /* min cpu task */ account, step_ptr->job_ptr->requid); /* requester user id */ rc = _print_record(step_ptr->job_ptr, now, buf); xfree(account); xfree(step_name); return rc; }
/* * load into the storage the start of a job step */ extern int jobacct_storage_p_step_start(void *db_conn, struct step_record *step_ptr) { char buf[BUFFER_SIZE]; int cpus = 0, rc; char node_list[BUFFER_SIZE]; #ifdef HAVE_BG char *ionodes = NULL; #endif float float_tmp = 0; char *account, *step_name; if (!storage_init) { debug("jobacct init was not called or it failed"); return SLURM_ERROR; } #ifdef HAVE_BG if (step_ptr->job_ptr->details) cpus = step_ptr->job_ptr->details->min_cpus; else cpus = step_ptr->job_ptr->cpu_cnt; select_g_select_jobinfo_get(step_ptr->job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); if (ionodes) { snprintf(node_list, BUFFER_SIZE, "%s[%s]", step_ptr->job_ptr->nodes, ionodes); xfree(ionodes); } else snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->job_ptr->nodes); #else if (!step_ptr->step_layout || !step_ptr->step_layout->task_cnt) { cpus = step_ptr->job_ptr->total_cpus; snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->job_ptr->nodes); } else { cpus = step_ptr->step_layout->task_cnt; snprintf(node_list, BUFFER_SIZE, "%s", step_ptr->step_layout->node_list); } #endif account = _safe_dup(step_ptr->job_ptr->account); step_name = _safe_dup(step_ptr->name); step_ptr->job_ptr->requid = -1; /* force to -1 for stats to know this * hasn't been set yet */ snprintf(buf, BUFFER_SIZE, _jobstep_format, JOB_STEP, step_ptr->step_id, /* stepid */ JOB_RUNNING, /* completion status */ 0, /* completion code */ cpus, /* number of tasks */ cpus, /* number of cpus */ 0, /* elapsed seconds */ 0, /* total cputime seconds */ 0, /* total cputime seconds */ 0, /* user seconds */ 0, /* user microseconds */ 0, /* system seconds */ 0, /* system microsecs */ 0, /* max rss */ 0, /* max ixrss */ 0, /* max idrss */ 0, /* max isrss */ 0, /* max minflt */ 0, /* max majflt */ 0, /* max nswap */ 0, /* total inblock */ 0, /* total outblock */ 0, /* total msgsnd */ 0, /* total msgrcv */ 0, /* total nsignals */ 0, /* total nvcsw */ 0, /* total nivcsw */ 0, /* max vsize */ 0, /* max vsize task */ float_tmp, /* ave vsize */ 0, /* max rss */ 0, /* max rss task */ float_tmp, /* ave rss */ 0, /* max pages */ 0, /* max pages task */ float_tmp, /* ave pages */ 0, /* min cpu */ 0, /* min cpu task */ float_tmp, /* ave cpu */ step_name, /* step exe name */ node_list, /* name of nodes step running on */ 0, /* max vsize node */ 0, /* max rss node */ 0, /* max pages node */ 0, /* min cpu node */ account, step_ptr->job_ptr->requid); /* requester user id */ rc = _print_record(step_ptr->job_ptr, step_ptr->start_time, buf); xfree(account); xfree(step_name); return rc; }