static void _load_slurm_config(void) { acct_storage_backup_host = slurm_get_accounting_storage_backup_host(); acct_storage_host = slurm_get_accounting_storage_host(); acct_storage_loc = slurm_get_accounting_storage_loc(); acct_storage_pass = slurm_get_accounting_storage_pass(); acct_storage_port = slurm_get_accounting_storage_port(); acct_storage_type = slurm_get_accounting_storage_type(); acct_storage_user = slurm_get_accounting_storage_user(); auth_type = slurm_get_auth_type(); msg_timeout = slurm_get_msg_timeout(); plugin_dir = slurm_get_plugin_dir(); private_data = slurm_get_private_data(); slurm_user_id = slurm_get_slurm_user_id(); track_wckey = slurm_get_track_wckey(); }
/* * pgsql_daily_rollup - rollup usage data per day */ static int pgsql_daily_rollup(pgsql_conn_t *pg_conn, char *cluster, time_t start, time_t end, uint16_t archive_data) { /* can't just add 86400 since daylight savings starts and ends every * once in a while */ int rc = SLURM_SUCCESS; char *query = NULL; time_t now = time(NULL); time_t curr_start = start, curr_end; uint16_t track_wckey = slurm_get_track_wckey(); if (_next_day(&curr_start, &curr_end) != SLURM_SUCCESS) { return SLURM_ERROR; } while(curr_start < end) { debug3("curr day is now %ld-%ld", curr_start, curr_end); query = xstrdup_printf( "SELECT %s.assoc_daily_rollup(%ld, %ld, %ld);", cluster, now, curr_start, curr_end); xstrfmtcat(query, "SELECT %s.cluster_daily_rollup(%ld, %ld, %ld);", cluster, now, curr_start, curr_end); if (track_wckey) { xstrfmtcat(query, "SELECT %s.wckey_daily_rollup(%ld, %ld, %ld);", cluster, now, curr_start, curr_end); } rc = DEF_QUERY_RET_RC; if (rc != SLURM_SUCCESS) { error("Couldn't add day rollup"); return SLURM_ERROR; } curr_start = curr_end; if (_next_day(&curr_start, &curr_end) != SLURM_SUCCESS) { return SLURM_ERROR; } } /* info("stop start %s", ctime(&curr_start)); */ /* info("stop end %s", ctime(&curr_end)); */ return SLURM_SUCCESS; }
extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn, char *cluster_name, time_t start, time_t end, uint16_t archive_data) { int rc = SLURM_SUCCESS; int add_sec = 3600; int i=0; time_t now = time(NULL); time_t curr_start = start; time_t curr_end = curr_start + add_sec; char *query = NULL; MYSQL_RES *result = NULL; MYSQL_ROW row; ListIterator a_itr = NULL; ListIterator c_itr = NULL; ListIterator w_itr = NULL; ListIterator r_itr = NULL; List assoc_usage_list = list_create(_destroy_local_id_usage); List cluster_down_list = list_create(_destroy_local_cluster_usage); List wckey_usage_list = list_create(_destroy_local_id_usage); List resv_usage_list = list_create(_destroy_local_resv_usage); uint16_t track_wckey = slurm_get_track_wckey(); /* char start_char[20], end_char[20]; */ char *job_req_inx[] = { "job.job_db_inx", "job.id_job", "job.id_assoc", "job.id_wckey", "job.array_task_pending", "job.time_eligible", "job.time_start", "job.time_end", "job.time_suspended", "job.cpus_alloc", "job.cpus_req", "job.id_resv", "SUM(step.consumed_energy)" }; char *job_str = NULL; enum { JOB_REQ_DB_INX, JOB_REQ_JOBID, JOB_REQ_ASSOCID, JOB_REQ_WCKEYID, JOB_REQ_ARRAY_PENDING, JOB_REQ_ELG, JOB_REQ_START, JOB_REQ_END, JOB_REQ_SUSPENDED, JOB_REQ_ACPU, JOB_REQ_RCPU, JOB_REQ_RESVID, JOB_REQ_ENERGY, JOB_REQ_COUNT }; char *suspend_req_inx[] = { "time_start", "time_end" }; char *suspend_str = NULL; enum { SUSPEND_REQ_START, SUSPEND_REQ_END, SUSPEND_REQ_COUNT }; char *resv_req_inx[] = { "id_resv", "assoclist", "cpus", "flags", "time_start", "time_end" }; char *resv_str = NULL; enum { RESV_REQ_ID, RESV_REQ_ASSOCS, RESV_REQ_CPU, RESV_REQ_FLAGS, RESV_REQ_START, RESV_REQ_END, RESV_REQ_COUNT }; i=0; xstrfmtcat(job_str, "%s", job_req_inx[i]); for(i=1; i<JOB_REQ_COUNT; i++) { xstrfmtcat(job_str, ", %s", job_req_inx[i]); } i=0; xstrfmtcat(suspend_str, "%s", suspend_req_inx[i]); for(i=1; i<SUSPEND_REQ_COUNT; i++) { xstrfmtcat(suspend_str, ", %s", suspend_req_inx[i]); } i=0; xstrfmtcat(resv_str, "%s", resv_req_inx[i]); for(i=1; i<RESV_REQ_COUNT; i++) { xstrfmtcat(resv_str, ", %s", resv_req_inx[i]); } /* info("begin start %s", slurm_ctime(&curr_start)); */ /* info("begin end %s", slurm_ctime(&curr_end)); */ a_itr = list_iterator_create(assoc_usage_list); c_itr = list_iterator_create(cluster_down_list); w_itr = list_iterator_create(wckey_usage_list); r_itr = list_iterator_create(resv_usage_list); while (curr_start < end) { int last_id = -1; int last_wckeyid = -1; int seconds = 0; int tot_time = 0; local_cluster_usage_t *loc_c_usage = NULL; local_cluster_usage_t *c_usage = NULL; local_resv_usage_t *r_usage = NULL; local_id_usage_t *a_usage = NULL; local_id_usage_t *w_usage = NULL; if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "%s curr hour is now %ld-%ld", cluster_name, curr_start, curr_end); /* info("start %s", slurm_ctime(&curr_start)); */ /* info("end %s", slurm_ctime(&curr_end)); */ c_usage = _setup_cluster_usage(mysql_conn, cluster_name, curr_start, curr_end, cluster_down_list); // now get the reservations during this time /* If a reservation has the IGNORE_JOBS flag we don't * have an easy way to distinguish the cpus a job not * running in the reservation, but on it's cpus. * So we will just ignore these reservations for * accounting purposes. */ query = xstrdup_printf("select %s from \"%s_%s\" where " "(time_start < %ld && time_end >= %ld) " "&& !(flags & %u)" "order by time_start", resv_str, cluster_name, resv_table, curr_end, curr_start, RESERVE_FLAG_IGN_JOBS); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); if (!(result = mysql_db_query_ret( mysql_conn, query, 0))) { xfree(query); _destroy_local_cluster_usage(c_usage); return SLURM_ERROR; } xfree(query); /* If a reservation overlaps another reservation we total up everything here as if they didn't but when calculating the total time for a cluster we will remove the extra time received. This may result in unexpected results with association based reports since the association is given the total amount of time of each reservation, thus equaling more time than is available. Job/Cluster/Reservation reports should be fine though since we really don't over allocate resources. The issue with us not being able to handle overlapping reservations here is unless the reservation completely overlaps the other reservation we have no idea how many cpus should be removed since this could be a heterogeneous system. This same problem exists when a reservation is created with the ignore_jobs option which will allow jobs to continue to run in the reservation that aren't suppose to. */ while ((row = mysql_fetch_row(result))) { time_t row_start = slurm_atoul(row[RESV_REQ_START]); time_t row_end = slurm_atoul(row[RESV_REQ_END]); uint32_t row_cpu = slurm_atoul(row[RESV_REQ_CPU]); uint32_t row_flags = slurm_atoul(row[RESV_REQ_FLAGS]); if (row_start < curr_start) row_start = curr_start; if (!row_end || row_end > curr_end) row_end = curr_end; /* Don't worry about it if the time is less * than 1 second. */ if ((row_end - row_start) < 1) continue; r_usage = xmalloc(sizeof(local_resv_usage_t)); r_usage->id = slurm_atoul(row[RESV_REQ_ID]); r_usage->local_assocs = list_create(slurm_destroy_char); slurm_addto_char_list(r_usage->local_assocs, row[RESV_REQ_ASSOCS]); r_usage->total_time = (row_end - row_start) * row_cpu; r_usage->start = row_start; r_usage->end = row_end; list_append(resv_usage_list, r_usage); /* Since this reservation was added to the cluster and only certain people could run there we will use this as allocated time on the system. If the reservation was a maintenance then we add the time to planned down time. */ /* only record time for the clusters that have registered. This continue should rarely if ever happen. */ if (!c_usage) continue; else if (row_flags & RESERVE_FLAG_MAINT) c_usage->pd_cpu += r_usage->total_time; else c_usage->a_cpu += r_usage->total_time; /* slurm_make_time_str(&r_usage->start, start_char, */ /* sizeof(start_char)); */ /* slurm_make_time_str(&r_usage->end, end_char, */ /* sizeof(end_char)); */ /* info("adding this much %lld to cluster %s " */ /* "%d %d %s - %s", */ /* r_usage->total_time, c_usage->name, */ /* (row_flags & RESERVE_FLAG_MAINT), */ /* r_usage->id, start_char, end_char); */ } mysql_free_result(result); /* now get the jobs during this time only */ query = xstrdup_printf("select %s from \"%s_%s\" as job " "left outer join \"%s_%s\" as step on " "job.job_db_inx=step.job_db_inx " "and (step.id_step>=0) " "where (job.time_eligible < %ld && " "(job.time_end >= %ld || " "job.time_end = 0)) " "group by job.job_db_inx " "order by job.id_assoc, " "job.time_eligible", job_str, cluster_name, job_table, cluster_name, step_table, curr_end, curr_start); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); if (!(result = mysql_db_query_ret( mysql_conn, query, 0))) { xfree(query); _destroy_local_cluster_usage(c_usage); return SLURM_ERROR; } xfree(query); while ((row = mysql_fetch_row(result))) { uint32_t job_id = slurm_atoul(row[JOB_REQ_JOBID]); uint32_t assoc_id = slurm_atoul(row[JOB_REQ_ASSOCID]); uint32_t wckey_id = slurm_atoul(row[JOB_REQ_WCKEYID]); uint32_t array_pending = slurm_atoul(row[JOB_REQ_ARRAY_PENDING]); uint32_t resv_id = slurm_atoul(row[JOB_REQ_RESVID]); time_t row_eligible = slurm_atoul(row[JOB_REQ_ELG]); time_t row_start = slurm_atoul(row[JOB_REQ_START]); time_t row_end = slurm_atoul(row[JOB_REQ_END]); uint32_t row_acpu = slurm_atoul(row[JOB_REQ_ACPU]); uint32_t row_rcpu = slurm_atoul(row[JOB_REQ_RCPU]); uint64_t row_energy = 0; int loc_seconds = 0; seconds = 0; if (row[JOB_REQ_ENERGY]) row_energy = slurm_atoull(row[JOB_REQ_ENERGY]); if (row_start && (row_start < curr_start)) row_start = curr_start; if (!row_start && row_end) row_start = row_end; if (!row_end || row_end > curr_end) row_end = curr_end; if (!row_start || ((row_end - row_start) < 1)) goto calc_cluster; seconds = (row_end - row_start); if (slurm_atoul(row[JOB_REQ_SUSPENDED])) { MYSQL_RES *result2 = NULL; MYSQL_ROW row2; /* get the suspended time for this job */ query = xstrdup_printf( "select %s from \"%s_%s\" where " "(time_start < %ld && (time_end >= %ld " "|| time_end = 0)) && job_db_inx=%s " "order by time_start", suspend_str, cluster_name, suspend_table, curr_end, curr_start, row[JOB_REQ_DB_INX]); debug4("%d(%s:%d) query\n%s", mysql_conn->conn, THIS_FILE, __LINE__, query); if (!(result2 = mysql_db_query_ret( mysql_conn, query, 0))) { xfree(query); _destroy_local_cluster_usage(c_usage); return SLURM_ERROR; } xfree(query); while ((row2 = mysql_fetch_row(result2))) { time_t local_start = slurm_atoul( row2[SUSPEND_REQ_START]); time_t local_end = slurm_atoul( row2[SUSPEND_REQ_END]); if (!local_start) continue; if (row_start > local_start) local_start = row_start; if (row_end < local_end) local_end = row_end; tot_time = (local_end - local_start); if (tot_time < 1) continue; seconds -= tot_time; } mysql_free_result(result2); } if (seconds < 1) { debug4("This job (%u) was suspended " "the entire hour", job_id); continue; } if (last_id != assoc_id) { a_usage = xmalloc(sizeof(local_id_usage_t)); a_usage->id = assoc_id; list_append(assoc_usage_list, a_usage); last_id = assoc_id; } a_usage->a_cpu += seconds * row_acpu; a_usage->energy += row_energy; if (!track_wckey) goto calc_cluster; /* do the wckey calculation */ if (last_wckeyid != wckey_id) { list_iterator_reset(w_itr); while ((w_usage = list_next(w_itr))) if (w_usage->id == wckey_id) break; if (!w_usage) { w_usage = xmalloc( sizeof(local_id_usage_t)); w_usage->id = wckey_id; list_append(wckey_usage_list, w_usage); } last_wckeyid = wckey_id; } w_usage->a_cpu += seconds * row_acpu; w_usage->energy += row_energy; /* do the cluster allocated calculation */ calc_cluster: /* Now figure out there was a disconnected slurmctld durning this job. */ list_iterator_reset(c_itr); while ((loc_c_usage = list_next(c_itr))) { int temp_end = row_end; int temp_start = row_start; if (loc_c_usage->start > temp_start) temp_start = loc_c_usage->start; if (loc_c_usage->end < temp_end) temp_end = loc_c_usage->end; loc_seconds = (temp_end - temp_start); if (loc_seconds < 1) continue; loc_seconds *= row_acpu; /* info(" Job %u was running for " */ /* "%d seconds while " */ /* "cluster %s's slurmctld " */ /* "wasn't responding", */ /* job_id, loc_seconds, cluster_name); */ if (loc_seconds >= loc_c_usage->total_time) loc_c_usage->total_time = 0; else { loc_c_usage->total_time -= loc_seconds * row_acpu; } } /* first figure out the reservation */ if (resv_id) { if (seconds <= 0) continue; /* Since we have already added the entire reservation as used time on the cluster we only need to calculate the used time for the reservation and then divy up the unused time over the associations able to run in the reservation. Since the job was to run, or ran a reservation we don't care about eligible time since that could totally skew the clusters reserved time since the job may be able to run outside of the reservation. */ list_iterator_reset(r_itr); while ((r_usage = list_next(r_itr))) { /* since the reservation could have changed in some way, thus making a new reservation record in the database, we have to make sure all the reservations are checked to see if such a thing has happened */ if (r_usage->id == resv_id) { int temp_end = row_end; int temp_start = row_start; if (r_usage->start > temp_start) temp_start = r_usage->start; if (r_usage->end < temp_end) temp_end = r_usage->end; if ((temp_end - temp_start) > 0) { r_usage->a_cpu += (temp_end - temp_start) * row_acpu; } } } continue; } /* only record time for the clusters that have registered. This continue should rarely if ever happen. */ if (!c_usage) continue; if (row_start && (seconds > 0)) { /* info("%d assoc %d adds " */ /* "(%d)(%d-%d) * %d = %d " */ /* "to %d", */ /* job_id, */ /* a_usage->id, */ /* seconds, */ /* row_end, row_start, */ /* row_acpu, */ /* seconds * row_acpu, */ /* row_acpu); */ c_usage->a_cpu += seconds * row_acpu; c_usage->energy += row_energy; } /* now reserved time */ if (!row_start || (row_start >= c_usage->start)) { int temp_end = row_start; int temp_start = row_eligible; if (c_usage->start > temp_start) temp_start = c_usage->start; if (c_usage->end < temp_end) temp_end = c_usage->end; loc_seconds = (temp_end - temp_start); if (loc_seconds > 0) { /* If we have pending jobs in an array they haven't been inserted into the database yet as proper job records, so handle them here. */ if (array_pending) loc_seconds *= array_pending; /* info("%d assoc %d reserved " */ /* "(%d)(%d-%d) * %d * %d = %d " */ /* "to %d", */ /* job_id, */ /* assoc_id, */ /* temp_end - temp_start, */ /* temp_end, temp_start, */ /* row_rcpu, */ /* array_pending, */ /* loc_seconds, */ /* row_rcpu); */ c_usage->r_cpu += loc_seconds * row_rcpu; } } } mysql_free_result(result); /* now figure out how much more to add to the associations that could had run in the reservation */ list_iterator_reset(r_itr); while ((r_usage = list_next(r_itr))) { int64_t idle = r_usage->total_time - r_usage->a_cpu; char *assoc = NULL; ListIterator tmp_itr = NULL; if (idle <= 0) continue; /* now divide that time by the number of associations in the reservation and add them to each association */ seconds = idle / list_count(r_usage->local_assocs); /* info("resv %d got %d for seconds for %d assocs", */ /* r_usage->id, seconds, */ /* list_count(r_usage->local_assocs)); */ tmp_itr = list_iterator_create(r_usage->local_assocs); while ((assoc = list_next(tmp_itr))) { uint32_t associd = slurm_atoul(assoc); if (last_id != associd) { list_iterator_reset(a_itr); while ((a_usage = list_next(a_itr))) { if (a_usage->id == associd) { last_id = a_usage->id; break; } } } if (!a_usage) { a_usage = xmalloc( sizeof(local_id_usage_t)); a_usage->id = associd; list_append(assoc_usage_list, a_usage); last_id = associd; } a_usage->a_cpu += seconds; } list_iterator_destroy(tmp_itr); } /* now apply the down time from the slurmctld disconnects */ if (c_usage) { list_iterator_reset(c_itr); while ((loc_c_usage = list_next(c_itr))) c_usage->d_cpu += loc_c_usage->total_time; if ((rc = _process_cluster_usage( mysql_conn, cluster_name, curr_start, curr_end, now, c_usage)) != SLURM_SUCCESS) { _destroy_local_cluster_usage(c_usage); goto end_it; } } list_iterator_reset(a_itr); while ((a_usage = list_next(a_itr))) { /* info("association (%d) %d alloc %d", */ /* a_usage->id, last_id, */ /* a_usage->a_cpu); */ if (query) { xstrfmtcat(query, ", (%ld, %ld, %d, %ld, %"PRIu64", " "%"PRIu64")", now, now, a_usage->id, curr_start, a_usage->a_cpu, a_usage->energy); } else { xstrfmtcat(query, "insert into \"%s_%s\" " "(creation_time, " "mod_time, id_assoc, time_start, " "alloc_cpu_secs, consumed_energy) " "values " "(%ld, %ld, %d, %ld, %"PRIu64", " "%"PRIu64")", cluster_name, assoc_hour_table, now, now, a_usage->id, curr_start, a_usage->a_cpu, a_usage->energy); } } if (query) { xstrfmtcat(query, " on duplicate key update " "mod_time=%ld, " "alloc_cpu_secs=VALUES(alloc_cpu_secs), " "consumed_energy=VALUES(consumed_energy);", now); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); rc = mysql_db_query(mysql_conn, query); xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't add assoc hour rollup"); _destroy_local_cluster_usage(c_usage); goto end_it; } } if (!track_wckey) goto end_loop; list_iterator_reset(w_itr); while ((w_usage = list_next(w_itr))) { /* info("association (%d) %d alloc %d", */ /* w_usage->id, last_id, */ /* w_usage->a_cpu); */ if (query) { xstrfmtcat(query, ", (%ld, %ld, %d, %ld, " "%"PRIu64", %"PRIu64")", now, now, w_usage->id, curr_start, w_usage->a_cpu, w_usage->energy); } else { xstrfmtcat(query, "insert into \"%s_%s\" " "(creation_time, " "mod_time, id_wckey, time_start, " "alloc_cpu_secs, consumed_energy) " "values " "(%ld, %ld, %d, %ld, " "%"PRIu64", %"PRIu64")", cluster_name, wckey_hour_table, now, now, w_usage->id, curr_start, w_usage->a_cpu, w_usage->energy); } } if (query) { xstrfmtcat(query, " on duplicate key update " "mod_time=%ld, " "alloc_cpu_secs=VALUES(alloc_cpu_secs), " "consumed_energy=VALUES(consumed_energy);", now); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); rc = mysql_db_query(mysql_conn, query); xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't add wckey hour rollup"); _destroy_local_cluster_usage(c_usage); goto end_it; } } end_loop: _destroy_local_cluster_usage(c_usage); list_flush(assoc_usage_list); list_flush(cluster_down_list); list_flush(wckey_usage_list); list_flush(resv_usage_list); curr_start = curr_end; curr_end = curr_start + add_sec; } end_it: xfree(suspend_str); xfree(job_str); xfree(resv_str); list_iterator_destroy(a_itr); list_iterator_destroy(c_itr); list_iterator_destroy(w_itr); list_iterator_destroy(r_itr); list_destroy(assoc_usage_list); list_destroy(cluster_down_list); list_destroy(wckey_usage_list); list_destroy(resv_usage_list); /* info("stop start %s", slurm_ctime(&curr_start)); */ /* info("stop end %s", slurm_ctime(&curr_end)); */ /* go check to see if we archive and purge */ if (rc == SLURM_SUCCESS) rc = _process_purge(mysql_conn, cluster_name, archive_data, SLURMDB_PURGE_HOURS); return rc; }
extern int as_mysql_monthly_rollup(mysql_conn_t *mysql_conn, char *cluster_name, time_t start, time_t end, uint16_t archive_data) { int rc = SLURM_SUCCESS; struct tm start_tm; time_t curr_start = start; time_t curr_end; time_t now = time(NULL); char *query = NULL; uint16_t track_wckey = slurm_get_track_wckey(); if (!localtime_r(&curr_start, &start_tm)) { error("Couldn't get localtime from month start %ld", curr_start); return SLURM_ERROR; } start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_mday = 1; start_tm.tm_mon++; start_tm.tm_isdst = -1; curr_end = mktime(&start_tm); while (curr_start < end) { if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "curr month is now %ld-%ld", curr_start, curr_end); /* info("start %s", slurm_ctime(&curr_start)); */ /* info("end %s", slurm_ctime(&curr_end)); */ query = xstrdup_printf( "insert into \"%s_%s\" (creation_time, " "mod_time, id_assoc, " "time_start, alloc_cpu_secs, consumed_energy) select " "%ld, %ld, id_assoc, " "%ld, @ASUM:=SUM(alloc_cpu_secs), " "@ESUM:=SUM(consumed_energy) " "from \"%s_%s\" where " "(time_start < %ld && time_start >= %ld) " "group by id_assoc on duplicate key update " "mod_time=%ld, alloc_cpu_secs=@ASUM, " "consumed_energy=@ESUM;", cluster_name, assoc_month_table, now, now, curr_start, cluster_name, assoc_day_table, curr_end, curr_start, now); /* We group on deleted here so if there are no entries we don't get an error, just nothing is returned. Else we get a bunch of NULL's */ xstrfmtcat(query, "insert into \"%s_%s\" (creation_time, " "mod_time, time_start, cpu_count, " "alloc_cpu_secs, down_cpu_secs, pdown_cpu_secs, " "idle_cpu_secs, over_cpu_secs, resv_cpu_secs, " "consumed_energy) " "select %ld, %ld, " "%ld, @CPU:=MAX(cpu_count), " "@ASUM:=SUM(alloc_cpu_secs), " "@DSUM:=SUM(down_cpu_secs), " "@PDSUM:=SUM(pdown_cpu_secs), " "@ISUM:=SUM(idle_cpu_secs), " "@OSUM:=SUM(over_cpu_secs), " "@RSUM:=SUM(resv_cpu_secs), " "@ESUM:=SUM(consumed_energy) from \"%s_%s\" where " "(time_start < %ld && time_start >= %ld) " "group by deleted " "on duplicate key update " "mod_time=%ld, cpu_count=@CPU, " "alloc_cpu_secs=@ASUM, down_cpu_secs=@DSUM, " "pdown_cpu_secs=@PDSUM, idle_cpu_secs=@ISUM, " "over_cpu_secs=@OSUM, resv_cpu_secs=@RSUM, " "consumed_energy=@ESUM;", cluster_name, cluster_month_table, now, now, curr_start, cluster_name, cluster_day_table, curr_end, curr_start, now); if (track_wckey) { xstrfmtcat(query, "insert into \"%s_%s\" " "(creation_time, mod_time, " "id_wckey, time_start, alloc_cpu_secs, " "consumed_energy) " "select %ld, %ld, id_wckey, %ld, " "@ASUM:=SUM(alloc_cpu_secs), " "@ESUM:=SUM(consumed_energy) " "from \"%s_%s\" where (time_start < %ld && " "time_start >= %ld) " "group by id_wckey on duplicate key update " "mod_time=%ld, alloc_cpu_secs=@ASUM, " "consumed_energy=@ESUM;", cluster_name, wckey_month_table, now, now, curr_start, cluster_name, wckey_day_table, curr_end, curr_start, now); } if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); rc = mysql_db_query(mysql_conn, query); xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't add day rollup"); return SLURM_ERROR; } curr_start = curr_end; if (!localtime_r(&curr_start, &start_tm)) { error("Couldn't get localtime from month start %ld", curr_start); } start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_mday = 1; start_tm.tm_mon++; start_tm.tm_isdst = -1; curr_end = mktime(&start_tm); } /* go check to see if we archive and purge */ rc = _process_purge(mysql_conn, cluster_name, archive_data, SLURMDB_PURGE_MONTHS); return rc; }
static void _layout_conf_dbd(GtkTreeStore *treestore) { ListIterator itr = NULL; GtkTreeIter iter; config_key_pair_t *key_pair; int update = 0; time_t now = time(NULL); char tmp_str[128], *user_name = NULL; List dbd_config_list = NULL; /* first load accounting parms from slurm.conf */ char *acct_storage_backup_host = slurm_get_accounting_storage_backup_host(); char *acct_storage_host = slurm_get_accounting_storage_host(); char *acct_storage_loc = slurm_get_accounting_storage_loc(); char *acct_storage_pass = slurm_get_accounting_storage_pass(); uint32_t acct_storage_port = slurm_get_accounting_storage_port(); char *acct_storage_type = slurm_get_accounting_storage_type(); char *acct_storage_user = slurm_get_accounting_storage_user(); char *auth_type = slurm_get_auth_type(); uint16_t msg_timeout = slurm_get_msg_timeout(); char *plugin_dir = slurm_get_plugin_dir(); uint16_t private_data = slurm_get_private_data(); uint32_t slurm_user_id = slurm_get_slurm_user_id(); uint16_t track_wckey = slurm_get_track_wckey(); slurm_make_time_str(&now, tmp_str, sizeof(tmp_str)); add_display_treestore_line_with_font( update, treestore, &iter, "SLURM Configuration data as of", tmp_str, "bold"); add_display_treestore_line(update, treestore, &iter, "AccountingStorageBackupHost", acct_storage_backup_host); add_display_treestore_line(update, treestore, &iter, "AccountingStorageHost", acct_storage_host); add_display_treestore_line(update, treestore, &iter, "AccountingStorageLoc", acct_storage_loc); add_display_treestore_line(update, treestore, &iter, "AccountingStoragePass", acct_storage_pass); sprintf(tmp_str, "%u", acct_storage_port); add_display_treestore_line(update, treestore, &iter, "AccountingStoragePort", tmp_str); add_display_treestore_line(update, treestore, &iter, "AccountingStorageType", acct_storage_type); add_display_treestore_line(update, treestore, &iter, "AccountingStorageUser", acct_storage_user); add_display_treestore_line(update, treestore, &iter, "AuthType", auth_type); sprintf(tmp_str, "%u sec", msg_timeout); add_display_treestore_line(update, treestore, &iter, "MessageTimeout", tmp_str); add_display_treestore_line(update, treestore, &iter, "PluginDir", plugin_dir); private_data_string(private_data, tmp_str, sizeof(tmp_str)); add_display_treestore_line(update, treestore, &iter, "PrivateData", tmp_str); user_name = uid_to_string(slurm_user_id); sprintf(tmp_str, "%s(%u)", user_name, slurm_user_id); xfree(user_name); add_display_treestore_line(update, treestore, &iter, "SlurmUserId", tmp_str); add_display_treestore_line(update, treestore, &iter, "SLURM_CONF", default_slurm_config_file); add_display_treestore_line(update, treestore, &iter, "SLURM_VERSION", SLURM_VERSION_STRING); sprintf(tmp_str, "%u", track_wckey); add_display_treestore_line(update, treestore, &iter, "TrackWCKey", tmp_str); xfree(acct_storage_backup_host); xfree(acct_storage_host); xfree(acct_storage_loc); xfree(acct_storage_pass); xfree(acct_storage_type); xfree(acct_storage_user); xfree(auth_type); xfree(plugin_dir); /* now load accounting parms from slurmdbd.conf */ /* second load slurmdbd.conf parms */ if (!(dbd_config_list = slurmdb_config_get(NULL))) return; add_display_treestore_line_with_font( update, treestore, &iter, "\nSlurmDBD Configuration:", NULL, "bold"); itr = list_iterator_create(dbd_config_list); while ((key_pair = list_next(itr))) { add_display_treestore_line(update, treestore, &iter, key_pair->name, key_pair->value); } list_iterator_destroy(itr); }
/* * pgsql_monthly_rollup - rollup usage data per month */ static int pgsql_monthly_rollup(pgsql_conn_t *pg_conn, char *cluster, time_t start, time_t end, uint16_t archive_data) { int rc = SLURM_SUCCESS; char *query = NULL; slurmdb_archive_cond_t arch_cond; slurmdb_job_cond_t job_cond; time_t now = time(NULL); time_t curr_start = start, curr_end; uint16_t track_wckey = slurm_get_track_wckey(); if (_next_month(&curr_start, &curr_end) != SLURM_SUCCESS) { return SLURM_ERROR; } while(curr_start < end) { debug3("curr month is now %ld-%ld", curr_start, curr_end); query = xstrdup_printf( "SELECT %s.assoc_monthly_rollup(%ld, %ld, %ld);", cluster, now, curr_start, curr_end); xstrfmtcat(query, "SELECT %s.cluster_monthly_rollup(%ld, %ld, %ld);", cluster, now, curr_start, curr_end); if (track_wckey) { xstrfmtcat( query, "SELECT %s.wckey_monthly_rollup(%ld, %ld, %ld);", cluster, now, curr_start, curr_end); } rc = DEF_QUERY_RET_RC; if (rc != SLURM_SUCCESS) { error("Couldn't add month rollup"); return SLURM_ERROR; } curr_start = curr_end; if (_next_month(&curr_start, &curr_end) != SLURM_SUCCESS) return SLURM_ERROR; } /* if we didn't ask for archive data return here and don't do anything extra just rollup */ if (!archive_data) return SLURM_SUCCESS; if (!slurmdbd_conf) return SLURM_SUCCESS; memset(&arch_cond, 0, sizeof(arch_cond)); memset(&job_cond, 0, sizeof(job_cond)); arch_cond.archive_dir = slurmdbd_conf->archive_dir; arch_cond.archive_script = slurmdbd_conf->archive_script; arch_cond.purge_event = slurmdbd_conf->purge_event; arch_cond.purge_job = slurmdbd_conf->purge_job; arch_cond.purge_step = slurmdbd_conf->purge_step; arch_cond.purge_suspend = slurmdbd_conf->purge_suspend; job_cond.cluster_list = list_create(NULL); list_append(job_cond.cluster_list, cluster); arch_cond.job_cond = &job_cond; rc = js_pg_archive(pg_conn, &arch_cond); list_destroy(job_cond.cluster_list); return rc; }
/* * pgsql_hourly_rollup - rollup usage data per hour */ static int pgsql_hourly_rollup(pgsql_conn_t *pg_conn, char *cluster, time_t start, time_t end) { int rc = SLURM_SUCCESS, add_sec = 3600; time_t now = time(NULL), curr_start = start, curr_end = curr_start + add_sec; char *query = NULL, *usage_recs = NULL; ListIterator a_itr = NULL, c_itr = NULL, w_itr = NULL, r_itr = NULL; List assoc_usage_list = list_create(_destroy_local_id_usage); List cluster_usage_list = list_create(_destroy_local_cluster_usage); List wckey_usage_list = list_create(_destroy_local_id_usage); List resv_usage_list = list_create(_destroy_local_resv_usage); uint16_t track_wckey = slurm_get_track_wckey(); a_itr = list_iterator_create(assoc_usage_list); c_itr = list_iterator_create(cluster_usage_list); w_itr = list_iterator_create(wckey_usage_list); r_itr = list_iterator_create(resv_usage_list); while(curr_start < end) { local_cluster_usage_t *c_usage = NULL; local_id_usage_t *a_usage = NULL; local_id_usage_t *w_usage = NULL; debug3("curr hour is now %ld-%ld", curr_start, curr_end); rc = _process_event_usage(pg_conn, cluster, curr_start, curr_end, cluster_usage_list); if (rc != SLURM_SUCCESS) goto end_it; rc = _process_resv_usage(pg_conn, cluster, curr_start, curr_end, cluster_usage_list, resv_usage_list); if (rc != SLURM_SUCCESS) goto end_it; rc = _process_job_usage(pg_conn, cluster, curr_start, curr_end, cluster_usage_list, resv_usage_list, assoc_usage_list, wckey_usage_list); if (rc != SLURM_SUCCESS) goto end_it; /* now figure out how much more to add to the associations that could had run in the reservation */ rc = _process_resv_idle_time(resv_usage_list, assoc_usage_list); if (rc != SLURM_SUCCESS) goto end_it; /* Now put the lists into the usage tables */ list_iterator_reset(c_itr); while((c_usage = list_next(c_itr))) { _cluster_usage_sanity_check(cluster, c_usage, curr_start,curr_end); /* info("cluster %s(%d) down %d alloc %d " */ /* "resv %d idle %d over %d " */ /* "total= %d = %d from %s", */ /* c_usage->name, */ /* c_usage->cpu_count, c_usage->d_cpu, */ /* c_usage->a_cpu, */ /* c_usage->r_cpu, c_usage->i_cpu, c_usage->o_cpu, */ /* c_usage->d_cpu + c_usage->a_cpu + */ /* c_usage->r_cpu + c_usage->i_cpu, */ /* c_usage->total_time, */ /* ctime(&c_usage->start)); */ /* info("to %s", ctime(&c_usage->end)); */ if (usage_recs) xstrcat(usage_recs, ", "); xstrfmtcat(usage_recs, "CAST((%ld, %ld, 0, %ld, %d, " "%"PRIu64", %"PRIu64", %"PRIu64", " "%"PRIu64", %"PRIu64", %"PRIu64")" " AS %s.%s)", now, now, curr_start, c_usage->cpu_count, c_usage->a_cpu, c_usage->d_cpu, c_usage->pd_cpu, c_usage->i_cpu, c_usage->o_cpu, c_usage->r_cpu, cluster, cluster_hour_table); } if (usage_recs) { query = xstrdup_printf ( "SELECT %s.add_cluster_hour_usages(ARRAY[%s]);", cluster, usage_recs); xfree(usage_recs); rc = DEF_QUERY_RET_RC; if (rc != SLURM_SUCCESS) { error("couldn't add cluster hour rollup"); goto end_it; } } list_iterator_reset(a_itr); while((a_usage = list_next(a_itr))) { /* info("association (%d) %d alloc %d", */ /* a_usage->id, last_id, */ /* a_usage->a_cpu); */ if (usage_recs) xstrcat(usage_recs, ", "); xstrfmtcat(usage_recs, "CAST((%ld, %ld, 0, %d, %ld, " "%"PRIu64") AS %s.%s)", now, now, a_usage->id, curr_start, a_usage->a_cpu, cluster, assoc_hour_table); } if (usage_recs) { query = xstrdup_printf( "SELECT %s.add_assoc_hour_usages(ARRAY[%s]);", cluster, usage_recs); xfree(usage_recs); rc = DEF_QUERY_RET_RC; if (rc != SLURM_SUCCESS) { error("Couldn't add assoc hour rollup"); goto end_it; } } if (!track_wckey) goto end_loop; list_iterator_reset(w_itr); while((w_usage = list_next(w_itr))) { /* info("association (%d) %d alloc %d", */ /* w_usage->id, last_id, */ /* w_usage->a_cpu); */ if (usage_recs) xstrcat(usage_recs, ", "); xstrfmtcat(usage_recs, "CAST((%ld, %ld, 0, %d, %ld, " "%"PRIu64", 0, 0) AS %s.%s)", now, now, w_usage->id, curr_start, w_usage->a_cpu, cluster, wckey_hour_table); } if (usage_recs) { query = xstrdup_printf( "SELECT %s.add_wckey_hour_usages(ARRAY[%s]);", cluster, usage_recs); xfree(usage_recs); rc = DEF_QUERY_RET_RC; if (rc != SLURM_SUCCESS) { error("Couldn't add wckey hour rollup"); goto end_it; } } end_loop: list_flush(assoc_usage_list); list_flush(cluster_usage_list); list_flush(wckey_usage_list); list_flush(resv_usage_list); curr_start = curr_end; curr_end = curr_start + add_sec; } end_it: list_iterator_destroy(a_itr); list_iterator_destroy(c_itr); list_iterator_destroy(w_itr); list_iterator_destroy(r_itr); list_destroy(assoc_usage_list); list_destroy(cluster_usage_list); list_destroy(wckey_usage_list); list_destroy(resv_usage_list); /* info("stop start %s", ctime(&curr_start)); */ /* info("stop end %s", ctime(&curr_end)); */ return rc; }
/* process job usage data */ static int _process_job_usage(pgsql_conn_t *pg_conn, char *cluster, time_t start, time_t end, List cu_list, List ru_list, List au_list, List wu_list) { DEF_VARS; PGresult *result2; ListIterator r_itr; int seconds = 0, last_id = -1, last_wckeyid = -1; local_cluster_usage_t *c_usage = NULL; local_resv_usage_t *r_usage = NULL; local_id_usage_t *a_usage = NULL, *w_usage = NULL; int track_wckey = slurm_get_track_wckey(); char *gj_fields = "job_db_inx,id_job,id_assoc,id_wckey,time_eligible," "time_start,time_end,time_suspended,cpus_alloc,cpus_req," "id_resv"; enum { F_DB_INX, F_JOBID, F_ASSOCID, F_WCKEYID, F_ELG, F_START, F_END, F_SUSPENDED, F_ACPU, F_RCPU, F_RESVID, F_COUNT }; query = xstrdup_printf( "SELECT %s FROM %s.%s WHERE (time_eligible < %ld AND " "(time_end >= %ld OR time_end = 0)) ORDER BY id_assoc, " "time_eligible", gj_fields, cluster, job_table, (long)end, (long)start); result = DEF_QUERY_RET; if (!result) { error("failed to get jobs"); return SLURM_ERROR; } r_itr = list_iterator_create(ru_list); FOR_EACH_ROW { int job_id = atoi(ROW(F_JOBID)); int assoc_id = atoi(ROW(F_ASSOCID)); int wckey_id = atoi(ROW(F_WCKEYID)); int resv_id = atoi(ROW(F_RESVID)); int row_eligible = atoi(ROW(F_ELG)); int row_start = atoi(ROW(F_START)); int row_end = atoi(ROW(F_END)); int row_acpu = atoi(ROW(F_ACPU)); int row_rcpu = atoi(ROW(F_RCPU)); seconds = 0; if (row_start && (row_start < start)) row_start = start; if (!row_start && row_end) row_start = row_end; if (!row_end || row_end > end) row_end = end; if (!row_start || ((row_end - row_start) < 1)) goto calc_cluster; seconds = (row_end - row_start); if (strcmp(ROW(F_SUSPENDED), "0")) { query = xstrdup_printf( "SELECT %s.get_job_suspend_time(%s, %ld, %ld);", cluster, ROW(F_DB_INX), start, end); result2 = DEF_QUERY_RET; if (!result2) { list_iterator_destroy(r_itr); return SLURM_ERROR; } seconds -= atoi(PQgetvalue(result2, 0, 0)); PQclear(result2); } if (seconds < 1) { debug4("This job (%u) was suspended " "the entire hour", job_id); /* TODO: how about resv usage? */ continue; } if (last_id != assoc_id) { /* ORDER BY associd */ a_usage = xmalloc(sizeof(local_id_usage_t)); a_usage->id = assoc_id; list_append(au_list, a_usage); last_id = assoc_id; } a_usage->a_cpu += seconds * row_acpu; if (!track_wckey) goto calc_cluster; /* do the wckey calculation */ if (last_wckeyid != wckey_id) { w_usage = list_find_first(wu_list, _cmp_local_id, &wckey_id); if (!w_usage) { w_usage = xmalloc(sizeof(local_id_usage_t)); w_usage->id = wckey_id; list_append(wu_list, w_usage); } last_wckeyid = wckey_id; } w_usage->a_cpu += seconds * row_acpu; /* do the cluster allocated calculation */ calc_cluster: /* first figure out the reservation */ if (resv_id) { if (seconds <= 0) continue; /* Since we have already added the entire reservation as used time on the cluster we only need to calculate the used time for the reservation and then divy up the unused time over the associations able to run in the reservation. Since the job was to run, or ran a reservation we don't care about eligible time since that could totally skew the clusters reserved time since the job may be able to run outside of the reservation. */ list_iterator_reset(r_itr); while((r_usage = list_next(r_itr))) { /* since the reservation could have changed in some way, thus making a new reservation record in the database, we have to make sure all the reservations are checked to see if such a thing has happened */ if ((r_usage->id == resv_id)) { int temp_end = row_end; int temp_start = row_start; if (r_usage->start > temp_start) temp_start = r_usage->start; if (r_usage->end < temp_end) temp_end = r_usage->end; if ((temp_end - temp_start) > 0) { r_usage->a_cpu += row_acpu * (temp_end - temp_start); } } } /* entire resv already added to cluster usage */ continue; } c_usage = list_peek(cu_list); /* only record time for the clusters that have registered. This continue should rarely if ever happen. */ if (!c_usage) continue; if (row_start && (seconds > 0)) { /* info("%d assoc %d adds " */ /* "(%d)(%d-%d) * %d = %d " */ /* "to %d", */ /* job_id, */ /* a_usage->id, */ /* seconds, */ /* row_end, row_start, */ /* row_acpu, */ /* seconds * row_acpu, */ /* row_acpu); */ c_usage->a_cpu += seconds * row_acpu; } /* now reserved time */ /* * job requesting for rcpu processors has been delayed * by (start_time - eligible_time) seconds * large r_cpu means cluster overload or bad scheduling? */ if (!row_start || (row_start >= c_usage->start)) { row_end = row_start; row_start = row_eligible; if (c_usage->start > row_start) row_start = c_usage->start; if (c_usage->end < row_end) row_end = c_usage->end; if ((row_end - row_start) > 0) { seconds = (row_end - row_start) * row_rcpu; /* info("%d assoc %d reserved " */ /* "(%d)(%d-%d) * %d = %d " */ /* "to %d", */ /* job_id, */ /* assoc_id, */ /* seconds, */ /* row_end, row_start, */ /* row_rcpu, */ /* seconds * row_rcpu, */ /* row_rcpu); */ c_usage->r_cpu += seconds; } } } END_EACH_ROW; PQclear(result); list_iterator_destroy(r_itr); return SLURM_SUCCESS; }
extern int as_mysql_daily_rollup(mysql_conn_t *mysql_conn, char *cluster_name, time_t start, time_t end, uint16_t archive_data) { /* can't just add 86400 since daylight savings starts and ends every * once in a while */ int rc = SLURM_SUCCESS; struct tm start_tm; time_t curr_start = start; time_t curr_end; time_t now = time(NULL); char *query = NULL; uint16_t track_wckey = slurm_get_track_wckey(); if (!localtime_r(&curr_start, &start_tm)) { error("Couldn't get localtime from day start %ld", curr_start); return SLURM_ERROR; } start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_mday++; start_tm.tm_isdst = -1; curr_end = mktime(&start_tm); while (curr_start < end) { debug3("curr day is now %ld-%ld", curr_start, curr_end); /* info("start %s", ctime(&curr_start)); */ /* info("end %s", ctime(&curr_end)); */ query = xstrdup_printf( "insert into \"%s_%s\" (creation_time, mod_time, " "id_assoc, " "time_start, alloc_cpu_secs) select %ld, %ld, " "id_assoc, " "%ld, @ASUM:=SUM(alloc_cpu_secs) from \"%s_%s\" where " "(time_start < %ld && time_start >= %ld) " "group by id_assoc on duplicate key update " "mod_time=%ld, alloc_cpu_secs=@ASUM;", cluster_name, assoc_day_table, now, now, curr_start, cluster_name, assoc_hour_table, curr_end, curr_start, now); /* We group on deleted here so if there are no entries we don't get an error, just nothing is returned. Else we get a bunch of NULL's */ xstrfmtcat(query, "insert into \"%s_%s\" (creation_time, " "mod_time, time_start, cpu_count, " "alloc_cpu_secs, down_cpu_secs, pdown_cpu_secs, " "idle_cpu_secs, over_cpu_secs, resv_cpu_secs) " "select %ld, %ld, " "%ld, @CPU:=MAX(cpu_count), " "@ASUM:=SUM(alloc_cpu_secs), " "@DSUM:=SUM(down_cpu_secs), " "@PDSUM:=SUM(pdown_cpu_secs), " "@ISUM:=SUM(idle_cpu_secs), " "@OSUM:=SUM(over_cpu_secs), " "@RSUM:=SUM(resv_cpu_secs) from \"%s_%s\" where " "(time_start < %ld && time_start >= %ld) " "group by deleted " "on duplicate key update " "mod_time=%ld, cpu_count=@CPU, " "alloc_cpu_secs=@ASUM, down_cpu_secs=@DSUM, " "pdown_cpu_secs=@PDSUM, idle_cpu_secs=@ISUM, " "over_cpu_secs=@OSUM, resv_cpu_secs=@RSUM;", cluster_name, cluster_day_table, now, now, curr_start, cluster_name, cluster_hour_table, curr_end, curr_start, now); if (track_wckey) { xstrfmtcat(query, "insert into \"%s_%s\" (creation_time, " "mod_time, id_wckey, time_start, " "alloc_cpu_secs) select %ld, %ld, " "id_wckey, %ld, @ASUM:=SUM(alloc_cpu_secs) " "from \"%s_%s\" where (time_start < %ld && " "time_start >= %ld) " "group by id_wckey on duplicate key update " "mod_time=%ld, alloc_cpu_secs=@ASUM;", cluster_name, wckey_day_table, now, now, curr_start, cluster_name, wckey_hour_table, curr_end, curr_start, now); } debug3("%d(%s:%d) query\n%s", mysql_conn->conn, THIS_FILE, __LINE__, query); rc = mysql_db_query(mysql_conn, query); xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't add day rollup"); return SLURM_ERROR; } curr_start = curr_end; if (!localtime_r(&curr_start, &start_tm)) { error("Couldn't get localtime from day start %ld", curr_start); return SLURM_ERROR; } start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_mday++; start_tm.tm_isdst = -1; curr_end = mktime(&start_tm); } /* info("stop start %s", ctime(&curr_start)); */ /* info("stop end %s", ctime(&curr_end)); */ /* go check to see if we archive and purge */ rc = _process_purge(mysql_conn, cluster_name, archive_data, SLURMDB_PURGE_DAYS); return rc; }
extern int as_mysql_nonhour_rollup(mysql_conn_t *mysql_conn, bool run_month, char *cluster_name, time_t start, time_t end, uint16_t archive_data) { /* can't just add 86400 since daylight savings starts and ends every * once in a while */ int rc = SLURM_SUCCESS; struct tm start_tm; time_t curr_start = start; time_t curr_end; time_t now = time(NULL); char *query = NULL; uint16_t track_wckey = slurm_get_track_wckey(); char *unit_name; while (curr_start < end) { if (!slurm_localtime_r(&curr_start, &start_tm)) { error("Couldn't get localtime from start %ld", curr_start); return SLURM_ERROR; } start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_isdst = -1; if (run_month) { unit_name = "month"; start_tm.tm_mday = 1; start_tm.tm_mon++; } else { unit_name = "day"; start_tm.tm_mday++; } curr_end = slurm_mktime(&start_tm); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "curr %s is now %ld-%ld", unit_name, curr_start, curr_end); /* info("start %s", slurm_ctime2(&curr_start)); */ /* info("end %s", slurm_ctime2(&curr_end)); */ query = xstrdup_printf( "insert into \"%s_%s\" (creation_time, mod_time, id, " "id_tres, time_start, alloc_secs) " "select %ld, %ld, id, id_tres, " "%ld, @ASUM:=SUM(alloc_secs) from \"%s_%s\" where " "(time_start < %ld && time_start >= %ld) " "group by id, id_tres on duplicate key update " "mod_time=%ld, alloc_secs=@ASUM;", cluster_name, run_month ? assoc_month_table : assoc_day_table, now, now, curr_start, cluster_name, run_month ? assoc_day_table : assoc_hour_table, curr_end, curr_start, now); /* We group on deleted here so if there are no entries we don't get an error, just nothing is returned. Else we get a bunch of NULL's */ xstrfmtcat(query, "insert into \"%s_%s\" (creation_time, " "mod_time, time_start, id_tres, count, " "alloc_secs, down_secs, pdown_secs, " "idle_secs, over_secs, resv_secs) " "select %ld, %ld, " "%ld, id_tres, @CPU:=MAX(count), " "@ASUM:=SUM(alloc_secs), " "@DSUM:=SUM(down_secs), " "@PDSUM:=SUM(pdown_secs), " "@ISUM:=SUM(idle_secs), " "@OSUM:=SUM(over_secs), " "@RSUM:=SUM(resv_secs) from \"%s_%s\" where " "(time_start < %ld && time_start >= %ld) " "group by deleted, id_tres " "on duplicate key update " "mod_time=%ld, count=@CPU, " "alloc_secs=@ASUM, down_secs=@DSUM, " "pdown_secs=@PDSUM, idle_secs=@ISUM, " "over_secs=@OSUM, resv_secs=@RSUM;", cluster_name, run_month ? cluster_month_table : cluster_day_table, now, now, curr_start, cluster_name, run_month ? cluster_day_table : cluster_hour_table, curr_end, curr_start, now); if (track_wckey) { xstrfmtcat(query, "insert into \"%s_%s\" (creation_time, " "mod_time, id, id_tres, time_start, " "alloc_secs) " "select %ld, %ld, " "id, id_tres, %ld, @ASUM:=SUM(alloc_secs) " "from \"%s_%s\" where (time_start < %ld && " "time_start >= %ld) group by id, id_tres " "on duplicate key update " "mod_time=%ld, alloc_secs=@ASUM;", cluster_name, run_month ? wckey_month_table : wckey_day_table, now, now, curr_start, cluster_name, run_month ? wckey_day_table : wckey_hour_table, curr_end, curr_start, now); } if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); rc = mysql_db_query(mysql_conn, query); xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't add %s rollup", unit_name); return SLURM_ERROR; } curr_start = curr_end; } /* info("stop start %s", slurm_ctime2(&curr_start)); */ /* info("stop end %s", slurm_ctime2(&curr_end)); */ /* go check to see if we archive and purge */ rc = _process_purge(mysql_conn, cluster_name, archive_data, run_month ? SLURMDB_PURGE_MONTHS : SLURMDB_PURGE_DAYS); return rc; }
/* * slurm_sprint_job_info - output information about a specific Slurm * job based upon message as loaded using slurm_load_jobs * IN job_ptr - an individual job information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ extern char * slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) { int i, j; char time_str[32], *group_name, *user_name; char tmp1[128], tmp2[128], tmp3[128], tmp4[128], tmp5[128], *tmp6_ptr; char tmp_line[512]; char *ionodes = NULL; uint16_t exit_status = 0, term_sig = 0; job_resources_t *job_resrcs = job_ptr->job_resrcs; char *out = NULL; time_t run_time; uint32_t min_nodes, max_nodes = 0; char *nodelist = "NodeList"; bitstr_t *core_bitmap; char *host; int sock_inx, sock_reps, last; int abs_node_inx, rel_node_inx; int bit_inx, bit_reps; uint32_t *last_mem_alloc_ptr = NULL; uint32_t last_mem_alloc = NO_VAL; char *last_hosts; hostlist_t hl, hl_last; char select_buf[122]; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); if (cluster_flags & CLUSTER_FLAG_BG) { nodelist = "MidplaneList"; select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); } /****** Line 1 ******/ snprintf(tmp_line, sizeof(tmp_line), "JobId=%u ", job_ptr->job_id); out = xstrdup(tmp_line); if (job_ptr->array_job_id) { snprintf(tmp_line, sizeof(tmp_line), "ArrayJobId=%u ArrayTaskId=%u ", job_ptr->array_job_id, job_ptr->array_task_id); xstrcat(out, tmp_line); } snprintf(tmp_line, sizeof(tmp_line), "Name=%s", job_ptr->name); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 2 ******/ user_name = uid_to_string((uid_t) job_ptr->user_id); group_name = gid_to_string((gid_t) job_ptr->group_id); snprintf(tmp_line, sizeof(tmp_line), "UserId=%s(%u) GroupId=%s(%u)", user_name, job_ptr->user_id, group_name, job_ptr->group_id); xfree(user_name); xfree(group_name); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 3 ******/ snprintf(tmp_line, sizeof(tmp_line), "Priority=%u Account=%s QOS=%s", job_ptr->priority, job_ptr->account, job_ptr->qos); xstrcat(out, tmp_line); if (slurm_get_track_wckey()) { snprintf(tmp_line, sizeof(tmp_line), " WCKey=%s", job_ptr->wckey); xstrcat(out, tmp_line); } if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 4 ******/ if (job_ptr->state_desc) { /* Replace white space with underscore for easier parsing */ for (j=0; job_ptr->state_desc[j]; j++) { if (isspace((int)job_ptr->state_desc[j])) job_ptr->state_desc[j] = '_'; } tmp6_ptr = job_ptr->state_desc; } else tmp6_ptr = job_reason_string(job_ptr->state_reason); snprintf(tmp_line, sizeof(tmp_line), "JobState=%s Reason=%s Dependency=%s", job_state_string(job_ptr->job_state), tmp6_ptr, job_ptr->dependency); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 5 ******/ snprintf(tmp_line, sizeof(tmp_line), "Requeue=%u Restarts=%u BatchFlag=%u ", job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag); xstrcat(out, tmp_line); if (WIFSIGNALED(job_ptr->exit_code)) term_sig = WTERMSIG(job_ptr->exit_code); exit_status = WEXITSTATUS(job_ptr->exit_code); snprintf(tmp_line, sizeof(tmp_line), "ExitCode=%u:%u", exit_status, term_sig); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 5a (optional) ******/ if (!(job_ptr->show_flags & SHOW_DETAIL)) goto line6; if (WIFSIGNALED(job_ptr->derived_ec)) term_sig = WTERMSIG(job_ptr->derived_ec); else term_sig = 0; exit_status = WEXITSTATUS(job_ptr->derived_ec); snprintf(tmp_line, sizeof(tmp_line), "DerivedExitCode=%u:%u", exit_status, term_sig); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 6 ******/ line6: snprintf(tmp_line, sizeof(tmp_line), "RunTime="); xstrcat(out, tmp_line); if (IS_JOB_PENDING(job_ptr)) run_time = 0; else if (IS_JOB_SUSPENDED(job_ptr)) run_time = job_ptr->pre_sus_time; else { time_t end_time; if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0)) end_time = time(NULL); else end_time = job_ptr->end_time; if (job_ptr->suspend_time) { run_time = (time_t) (difftime(end_time, job_ptr->suspend_time) + job_ptr->pre_sus_time); } else run_time = (time_t) difftime(end_time, job_ptr->start_time); } secs2time_str(run_time, tmp1, sizeof(tmp1)); sprintf(tmp_line, "%s ", tmp1); xstrcat(out, tmp_line); snprintf(tmp_line, sizeof(tmp_line), "TimeLimit="); xstrcat(out, tmp_line); if (job_ptr->time_limit == NO_VAL) sprintf(tmp_line, "Partition_Limit"); else { mins2time_str(job_ptr->time_limit, tmp_line, sizeof(tmp_line)); } xstrcat(out, tmp_line); snprintf(tmp_line, sizeof(tmp_line), " TimeMin="); xstrcat(out, tmp_line); if (job_ptr->time_min == 0) sprintf(tmp_line, "N/A"); else { mins2time_str(job_ptr->time_min, tmp_line, sizeof(tmp_line)); } xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 7 ******/ slurm_make_time_str((time_t *)&job_ptr->submit_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "SubmitTime=%s ", time_str); xstrcat(out, tmp_line); slurm_make_time_str((time_t *)&job_ptr->eligible_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "EligibleTime=%s", time_str); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 8 (optional) ******/ if (job_ptr->resize_time) { slurm_make_time_str((time_t *)&job_ptr->resize_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "ResizeTime=%s", time_str); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } /****** Line 9 ******/ slurm_make_time_str((time_t *)&job_ptr->start_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "StartTime=%s ", time_str); xstrcat(out, tmp_line); snprintf(tmp_line, sizeof(tmp_line), "EndTime="); xstrcat(out, tmp_line); if ((job_ptr->time_limit == INFINITE) && (job_ptr->end_time > time(NULL))) sprintf(tmp_line, "Unknown"); else { slurm_make_time_str ((time_t *)&job_ptr->end_time, time_str, sizeof(time_str)); sprintf(tmp_line, "%s", time_str); } xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 10 ******/ if (job_ptr->preempt_time == 0) sprintf(tmp_line, "PreemptTime=None "); else { slurm_make_time_str((time_t *)&job_ptr->preempt_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "PreemptTime=%s ", time_str); } xstrcat(out, tmp_line); if (job_ptr->suspend_time) { slurm_make_time_str ((time_t *)&job_ptr->suspend_time, time_str, sizeof(time_str)); } else { strncpy(time_str, "None", sizeof(time_str)); } snprintf(tmp_line, sizeof(tmp_line), "SuspendTime=%s SecsPreSuspend=%ld", time_str, (long int)job_ptr->pre_sus_time); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 11 ******/ snprintf(tmp_line, sizeof(tmp_line), "Partition=%s AllocNode:Sid=%s:%u", job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 12 ******/ snprintf(tmp_line, sizeof(tmp_line), "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes, nodelist, job_ptr->exc_nodes); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 13 ******/ xstrfmtcat(out, "%s=", nodelist); xstrcat(out, job_ptr->nodes); if (job_ptr->nodes && ionodes) { snprintf(tmp_line, sizeof(tmp_line), "[%s]", ionodes); xstrcat(out, tmp_line); xfree(ionodes); } if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 14 (optional) ******/ if (job_ptr->batch_host) { snprintf(tmp_line, sizeof(tmp_line), "BatchHost=%s", job_ptr->batch_host); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } /****** Line 15 ******/ if (cluster_flags & CLUSTER_FLAG_BG) { select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &min_nodes); if ((min_nodes == 0) || (min_nodes == NO_VAL)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; } else if (job_ptr->max_nodes) max_nodes = min_nodes; } else { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; } _sprint_range(tmp1, sizeof(tmp1), job_ptr->num_cpus, job_ptr->max_cpus); _sprint_range(tmp2, sizeof(tmp2), min_nodes, max_nodes); if (job_ptr->sockets_per_node == (uint16_t) NO_VAL) strcpy(tmp3, "*"); else snprintf(tmp3, sizeof(tmp3), "%u", job_ptr->sockets_per_node); if (job_ptr->cores_per_socket == (uint16_t) NO_VAL) strcpy(tmp4, "*"); else snprintf(tmp4, sizeof(tmp4), "%u", job_ptr->cores_per_socket); if (job_ptr->threads_per_core == (uint16_t) NO_VAL) strcpy(tmp5, "*"); else snprintf(tmp5, sizeof(tmp5), "%u", job_ptr->threads_per_core); snprintf(tmp_line, sizeof(tmp_line), "NumNodes=%s NumCPUs=%s CPUs/Task=%u ReqS:C:T=%s:%s:%s", tmp2, tmp1, job_ptr->cpus_per_task, tmp3, tmp4, tmp5); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); if (!job_resrcs) goto line15; if (cluster_flags & CLUSTER_FLAG_BG) { if ((job_resrcs->cpu_array_cnt > 0) && (job_resrcs->cpu_array_value) && (job_resrcs->cpu_array_reps)) { int length = 0; xstrcat(out, "CPUs="); length += 10; for (i = 0; i < job_resrcs->cpu_array_cnt; i++) { if (length > 70) { /* skip to last CPU group entry */ if (i < job_resrcs->cpu_array_cnt - 1) { continue; } /* add ellipsis before last entry */ xstrcat(out, "...,"); length += 4; } snprintf(tmp_line, sizeof(tmp_line), "%d", job_resrcs->cpus[i]); xstrcat(out, tmp_line); length += strlen(tmp_line); if (job_resrcs->cpu_array_reps[i] > 1) { snprintf(tmp_line, sizeof(tmp_line), "*%d", job_resrcs->cpu_array_reps[i]); xstrcat(out, tmp_line); length += strlen(tmp_line); } if (i < job_resrcs->cpu_array_cnt - 1) { xstrcat(out, ","); length++; } } if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } } else { if (!job_resrcs->core_bitmap) goto line15; last = bit_fls(job_resrcs->core_bitmap); if (last == -1) goto line15; hl = hostlist_create(job_ptr->nodes); if (!hl) { error("slurm_sprint_job_info: hostlist_create: %s", job_ptr->nodes); return NULL; } hl_last = hostlist_create(NULL); if (!hl_last) { error("slurm_sprint_job_info: hostlist_create: NULL"); hostlist_destroy(hl); return NULL; } bit_inx = 0; i = sock_inx = sock_reps = 0; abs_node_inx = job_ptr->node_inx[i]; /* tmp1[] stores the current cpu(s) allocated */ tmp2[0] = '\0'; /* stores last cpu(s) allocated */ for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts; rel_node_inx++) { if (sock_reps >= job_resrcs->sock_core_rep_count[sock_inx]) { sock_inx++; sock_reps = 0; } sock_reps++; bit_reps = job_resrcs->sockets_per_node[sock_inx] * job_resrcs->cores_per_socket[sock_inx]; core_bitmap = bit_alloc(bit_reps); for (j=0; j < bit_reps; j++) { if (bit_test(job_resrcs->core_bitmap, bit_inx)) bit_set(core_bitmap, j); bit_inx++; } bit_fmt(tmp1, sizeof(tmp1), core_bitmap); FREE_NULL_BITMAP(core_bitmap); host = hostlist_shift(hl); /* * If the allocation values for this host are not the same as the * last host, print the report of the last group of hosts that had * identical allocation values. */ if (strcmp(tmp1, tmp2) || (last_mem_alloc_ptr != job_resrcs->memory_allocated) || (job_resrcs->memory_allocated && (last_mem_alloc != job_resrcs->memory_allocated[rel_node_inx]))) { if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc( hl_last); snprintf(tmp_line, sizeof(tmp_line), " Nodes=%s CPU_IDs=%s Mem=%u", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0); xfree(last_hosts); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); hostlist_destroy(hl_last); hl_last = hostlist_create(NULL); } strcpy(tmp2, tmp1); last_mem_alloc_ptr = job_resrcs->memory_allocated; if (last_mem_alloc_ptr) last_mem_alloc = job_resrcs-> memory_allocated[rel_node_inx]; else last_mem_alloc = NO_VAL; } hostlist_push_host(hl_last, host); free(host); if (bit_inx > last) break; if (abs_node_inx > job_ptr->node_inx[i+1]) { i += 2; abs_node_inx = job_ptr->node_inx[i]; } else { abs_node_inx++; } } if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc(hl_last); snprintf(tmp_line, sizeof(tmp_line), " Nodes=%s CPU_IDs=%s Mem=%u", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0); xfree(last_hosts); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); } hostlist_destroy(hl); hostlist_destroy(hl_last); } /****** Line 15 ******/ line15: if (job_ptr->pn_min_memory & MEM_PER_CPU) { job_ptr->pn_min_memory &= (~MEM_PER_CPU); tmp6_ptr = "CPU"; } else tmp6_ptr = "Node"; if (cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)job_ptr->pn_min_cpus, tmp1, sizeof(tmp1), UNIT_NONE); snprintf(tmp_line, sizeof(tmp_line), "MinCPUsNode=%s", tmp1); } else { snprintf(tmp_line, sizeof(tmp_line), "MinCPUsNode=%u", job_ptr->pn_min_cpus); } xstrcat(out, tmp_line); convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1), UNIT_MEGA); convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2), UNIT_MEGA); snprintf(tmp_line, sizeof(tmp_line), " MinMemory%s=%s MinTmpDiskNode=%s", tmp6_ptr, tmp1, tmp2); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 16 ******/ snprintf(tmp_line, sizeof(tmp_line), "Features=%s Gres=%s Reservation=%s", job_ptr->features, job_ptr->gres, job_ptr->resv_name); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 17 ******/ snprintf(tmp_line, sizeof(tmp_line), "Shared=%s Contiguous=%d Licenses=%s Network=%s", (job_ptr->shared == 0 ? "0" : job_ptr->shared == 1 ? "1" : "OK"), job_ptr->contiguous, job_ptr->licenses, job_ptr->network); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 18 ******/ snprintf(tmp_line, sizeof(tmp_line), "Command=%s", job_ptr->command); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); /****** Line 19 ******/ snprintf(tmp_line, sizeof(tmp_line), "WorkDir=%s", job_ptr->work_dir); xstrcat(out, tmp_line); if (cluster_flags & CLUSTER_FLAG_BG) { /****** Line 20 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_BG_ID); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "Block_ID=%s", select_buf); xstrcat(out, tmp_line); } /****** Line 21 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED_SHORT); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); xstrcat(out, select_buf); } if (cluster_flags & CLUSTER_FLAG_BGL) { /****** Line 22 (optional) ******/ select_g_select_jobinfo_sprint( job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_BLRTS_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "BlrtsImage=%s", select_buf); xstrcat(out, tmp_line); } } /****** Line 23 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_LINUX_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); if (cluster_flags & CLUSTER_FLAG_BGL) snprintf(tmp_line, sizeof(tmp_line), "LinuxImage=%s", select_buf); else snprintf(tmp_line, sizeof(tmp_line), "CnloadImage=%s", select_buf); xstrcat(out, tmp_line); } /****** Line 24 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MLOADER_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "MloaderImage=%s", select_buf); xstrcat(out, tmp_line); } /****** Line 25 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_RAMDISK_IMAGE); if (select_buf[0] != '\0') { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); if (cluster_flags & CLUSTER_FLAG_BGL) snprintf(tmp_line, sizeof(tmp_line), "RamDiskImage=%s", select_buf); else snprintf(tmp_line, sizeof(tmp_line), "IoloadImage=%s", select_buf); xstrcat(out, tmp_line); } } /****** Line 26 (optional) ******/ if (job_ptr->comment) { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); snprintf(tmp_line, sizeof(tmp_line), "Comment=%s ", job_ptr->comment); xstrcat(out, tmp_line); } /****** Line 27 (optional) ******/ if (job_ptr->batch_script) { if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); xstrcat(out, "BatchScript=\n"); xstrcat(out, job_ptr->batch_script); } /****** Line 28 (optional) ******/ if (job_ptr->req_switch) { char time_buf[32]; if (one_liner) xstrcat(out, " "); else xstrcat(out, "\n "); secs2time_str((time_t) job_ptr->wait4switch, time_buf, sizeof(time_buf)); snprintf(tmp_line, sizeof(tmp_line), "Switches=%u@%s\n", job_ptr->req_switch, time_buf); xstrcat(out, tmp_line); } /****** Line 29 (optional) ******/ if (one_liner) xstrcat(out, "\n"); else xstrcat(out, "\n\n"); return out; }
static uint32_t _get_wckeyid(mysql_conn_t *mysql_conn, char **name, uid_t uid, char *cluster, uint32_t associd) { uint32_t wckeyid = 0; if (slurm_get_track_wckey()) { /* Here we are looking for the wckeyid if it doesn't * exist we will create one. We don't need to check * if it is good or not. Right now this is the only * place things are created. We do this only on a job * start, not on a job submit since we don't want to * slow down getting the db_index back to the * controller. */ slurmdb_wckey_rec_t wckey_rec; char *user = NULL; /* since we are unable to rely on uids here (someone could not have there uid in the system yet) we must first get the user name from the associd */ if (!(user = _get_user_from_associd( mysql_conn, cluster, associd))) { error("No user for associd %u", associd); goto no_wckeyid; } /* get the default key */ if (!*name) { slurmdb_user_rec_t user_rec; memset(&user_rec, 0, sizeof(slurmdb_user_rec_t)); user_rec.uid = NO_VAL; user_rec.name = user; if (assoc_mgr_fill_in_user(mysql_conn, &user_rec, 1, NULL) != SLURM_SUCCESS) { error("No user by name of %s assoc %u", user, associd); xfree(user); goto no_wckeyid; } if (user_rec.default_wckey) *name = xstrdup_printf("*%s", user_rec.default_wckey); else *name = xstrdup_printf("*"); } memset(&wckey_rec, 0, sizeof(slurmdb_wckey_rec_t)); wckey_rec.name = (*name); wckey_rec.uid = NO_VAL; wckey_rec.user = user; wckey_rec.cluster = cluster; if (assoc_mgr_fill_in_wckey(mysql_conn, &wckey_rec, ACCOUNTING_ENFORCE_WCKEYS, NULL) != SLURM_SUCCESS) { List wckey_list = NULL; slurmdb_wckey_rec_t *wckey_ptr = NULL; wckey_list = list_create(slurmdb_destroy_wckey_rec); wckey_ptr = xmalloc(sizeof(slurmdb_wckey_rec_t)); wckey_ptr->name = xstrdup((*name)); wckey_ptr->user = xstrdup(user); wckey_ptr->cluster = xstrdup(cluster); list_append(wckey_list, wckey_ptr); /* info("adding wckey '%s' '%s' '%s'", */ /* wckey_ptr->name, wckey_ptr->user, */ /* wckey_ptr->cluster); */ /* we have already checked to make sure this was the slurm user before calling this */ if (as_mysql_add_wckeys(mysql_conn, slurm_get_slurm_user_id(), wckey_list) == SLURM_SUCCESS) acct_storage_p_commit(mysql_conn, 1); /* If that worked lets get it */ assoc_mgr_fill_in_wckey(mysql_conn, &wckey_rec, ACCOUNTING_ENFORCE_WCKEYS, NULL); list_destroy(wckey_list); } xfree(user); /* info("got wckeyid of %d", wckey_rec.id); */ wckeyid = wckey_rec.id; } no_wckeyid: return wckeyid; }
/* * slurm_sprint_job_info - output information about a specific Slurm * job based upon message as loaded using slurm_load_jobs * IN job_ptr - an individual job information record pointer * IN one_liner - print as a single line if true * RET out - char * containing formatted output (must be freed after call) * NULL is returned on failure. */ extern char * slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) { int i, j, k; char time_str[32], *group_name, *user_name; char *gres_last = "", tmp1[128], tmp2[128]; char *tmp6_ptr; char tmp_line[1024 * 128]; char tmp_path[MAXPATHLEN]; char *ionodes = NULL; uint16_t exit_status = 0, term_sig = 0; job_resources_t *job_resrcs = job_ptr->job_resrcs; char *out = NULL; time_t run_time; uint32_t min_nodes, max_nodes = 0; char *nodelist = "NodeList"; bitstr_t *cpu_bitmap; char *host; int sock_inx, sock_reps, last; int abs_node_inx, rel_node_inx; int64_t nice; int bit_inx, bit_reps; uint64_t *last_mem_alloc_ptr = NULL; uint64_t last_mem_alloc = NO_VAL64; char *last_hosts; hostlist_t hl, hl_last; char select_buf[122]; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); uint32_t threads; char *line_end = (one_liner) ? " " : "\n "; if (cluster_flags & CLUSTER_FLAG_BG) { nodelist = "MidplaneList"; select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_IONODES, &ionodes); } /****** Line 1 ******/ xstrfmtcat(out, "JobId=%u ", job_ptr->job_id); if (job_ptr->array_job_id) { if (job_ptr->array_task_str) { xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%s ", job_ptr->array_job_id, job_ptr->array_task_str); } else { xstrfmtcat(out, "ArrayJobId=%u ArrayTaskId=%u ", job_ptr->array_job_id, job_ptr->array_task_id); } } xstrfmtcat(out, "JobName=%s", job_ptr->name); xstrcat(out, line_end); /****** Line 2 ******/ user_name = uid_to_string((uid_t) job_ptr->user_id); group_name = gid_to_string((gid_t) job_ptr->group_id); xstrfmtcat(out, "UserId=%s(%u) GroupId=%s(%u) MCS_label=%s", user_name, job_ptr->user_id, group_name, job_ptr->group_id, (job_ptr->mcs_label==NULL) ? "N/A" : job_ptr->mcs_label); xfree(user_name); xfree(group_name); xstrcat(out, line_end); /****** Line 3 ******/ nice = ((int64_t)job_ptr->nice) - NICE_OFFSET; xstrfmtcat(out, "Priority=%u Nice=%"PRIi64" Account=%s QOS=%s", job_ptr->priority, nice, job_ptr->account, job_ptr->qos); if (slurm_get_track_wckey()) xstrfmtcat(out, " WCKey=%s", job_ptr->wckey); xstrcat(out, line_end); /****** Line 4 ******/ xstrfmtcat(out, "JobState=%s ", job_state_string(job_ptr->job_state)); if (job_ptr->state_desc) { /* Replace white space with underscore for easier parsing */ for (j=0; job_ptr->state_desc[j]; j++) { if (isspace((int)job_ptr->state_desc[j])) job_ptr->state_desc[j] = '_'; } xstrfmtcat(out, "Reason=%s ", job_ptr->state_desc); } else xstrfmtcat(out, "Reason=%s ", job_reason_string(job_ptr->state_reason)); xstrfmtcat(out, "Dependency=%s", job_ptr->dependency); xstrcat(out, line_end); /****** Line 5 ******/ xstrfmtcat(out, "Requeue=%u Restarts=%u BatchFlag=%u Reboot=%u ", job_ptr->requeue, job_ptr->restart_cnt, job_ptr->batch_flag, job_ptr->reboot); if (WIFSIGNALED(job_ptr->exit_code)) term_sig = WTERMSIG(job_ptr->exit_code); exit_status = WEXITSTATUS(job_ptr->exit_code); xstrfmtcat(out, "ExitCode=%u:%u", exit_status, term_sig); xstrcat(out, line_end); /****** Line 5a (optional) ******/ if (job_ptr->show_flags & SHOW_DETAIL) { if (WIFSIGNALED(job_ptr->derived_ec)) term_sig = WTERMSIG(job_ptr->derived_ec); else term_sig = 0; exit_status = WEXITSTATUS(job_ptr->derived_ec); xstrfmtcat(out, "DerivedExitCode=%u:%u", exit_status, term_sig); xstrcat(out, line_end); } /****** Line 6 ******/ if (IS_JOB_PENDING(job_ptr)) run_time = 0; else if (IS_JOB_SUSPENDED(job_ptr)) run_time = job_ptr->pre_sus_time; else { time_t end_time; if (IS_JOB_RUNNING(job_ptr) || (job_ptr->end_time == 0)) end_time = time(NULL); else end_time = job_ptr->end_time; if (job_ptr->suspend_time) { run_time = (time_t) (difftime(end_time, job_ptr->suspend_time) + job_ptr->pre_sus_time); } else run_time = (time_t) difftime(end_time, job_ptr->start_time); } secs2time_str(run_time, time_str, sizeof(time_str)); xstrfmtcat(out, "RunTime=%s ", time_str); if (job_ptr->time_limit == NO_VAL) xstrcat(out, "TimeLimit=Partition_Limit "); else { mins2time_str(job_ptr->time_limit, time_str, sizeof(time_str)); xstrfmtcat(out, "TimeLimit=%s ", time_str); } if (job_ptr->time_min == 0) xstrcat(out, "TimeMin=N/A"); else { mins2time_str(job_ptr->time_min, time_str, sizeof(time_str)); xstrfmtcat(out, "TimeMin=%s", time_str); } xstrcat(out, line_end); /****** Line 7 ******/ slurm_make_time_str(&job_ptr->submit_time, time_str, sizeof(time_str)); xstrfmtcat(out, "SubmitTime=%s ", time_str); slurm_make_time_str(&job_ptr->eligible_time, time_str, sizeof(time_str)); xstrfmtcat(out, "EligibleTime=%s", time_str); xstrcat(out, line_end); /****** Line 8 (optional) ******/ if (job_ptr->resize_time) { slurm_make_time_str(&job_ptr->resize_time, time_str, sizeof(time_str)); xstrfmtcat(out, "ResizeTime=%s", time_str); xstrcat(out, line_end); } /****** Line 9 ******/ slurm_make_time_str(&job_ptr->start_time, time_str, sizeof(time_str)); xstrfmtcat(out, "StartTime=%s ", time_str); if ((job_ptr->time_limit == INFINITE) && (job_ptr->end_time > time(NULL))) xstrcat(out, "EndTime=Unknown "); else { slurm_make_time_str(&job_ptr->end_time, time_str, sizeof(time_str)); xstrfmtcat(out, "EndTime=%s ", time_str); } if (job_ptr->deadline) { slurm_make_time_str(&job_ptr->deadline, time_str, sizeof(time_str)); xstrfmtcat(out, "Deadline=%s", time_str); } else { xstrcat(out, "Deadline=N/A"); } xstrcat(out, line_end); /****** Line 10 ******/ if (job_ptr->preempt_time == 0) xstrcat(out, "PreemptTime=None "); else { slurm_make_time_str(&job_ptr->preempt_time, time_str, sizeof(time_str)); xstrfmtcat(out, "PreemptTime=%s ", time_str); } if (job_ptr->suspend_time) { slurm_make_time_str(&job_ptr->suspend_time, time_str, sizeof(time_str)); xstrfmtcat(out, "SuspendTime=%s ", time_str); } else xstrcat(out, "SuspendTime=None "); xstrfmtcat(out, "SecsPreSuspend=%ld", (long int)job_ptr->pre_sus_time); xstrcat(out, line_end); /****** Line 11 ******/ xstrfmtcat(out, "Partition=%s AllocNode:Sid=%s:%u", job_ptr->partition, job_ptr->alloc_node, job_ptr->alloc_sid); xstrcat(out, line_end); /****** Line 12 ******/ xstrfmtcat(out, "Req%s=%s Exc%s=%s", nodelist, job_ptr->req_nodes, nodelist, job_ptr->exc_nodes); xstrcat(out, line_end); /****** Line 13 ******/ xstrfmtcat(out, "%s=%s", nodelist, job_ptr->nodes); if (job_ptr->nodes && ionodes) { xstrfmtcat(out, "[%s]", ionodes); xfree(ionodes); } if (job_ptr->sched_nodes) xstrfmtcat(out, " Sched%s=%s", nodelist, job_ptr->sched_nodes); xstrcat(out, line_end); /****** Line 14 (optional) ******/ if (job_ptr->batch_host) { xstrfmtcat(out, "BatchHost=%s", job_ptr->batch_host); xstrcat(out, line_end); } /****** Line 14a (optional) ******/ if (job_ptr->fed_siblings) { xstrfmtcat(out, "FedOrigin=%s FedSiblings=%s", job_ptr->fed_origin_str, job_ptr->fed_siblings_str); xstrcat(out, line_end); } /****** Line 15 ******/ if (cluster_flags & CLUSTER_FLAG_BG) { select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_NODE_CNT, &min_nodes); if ((min_nodes == 0) || (min_nodes == NO_VAL)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; } else if (job_ptr->max_nodes) max_nodes = min_nodes; } else if (IS_JOB_PENDING(job_ptr)) { min_nodes = job_ptr->num_nodes; max_nodes = job_ptr->max_nodes; if (max_nodes && (max_nodes < min_nodes)) min_nodes = max_nodes; } else { min_nodes = job_ptr->num_nodes; max_nodes = 0; } _sprint_range(tmp_line, sizeof(tmp_line), min_nodes, max_nodes); xstrfmtcat(out, "NumNodes=%s ", tmp_line); _sprint_range(tmp_line, sizeof(tmp_line), job_ptr->num_cpus, job_ptr->max_cpus); xstrfmtcat(out, "NumCPUs=%s ", tmp_line); xstrfmtcat(out, "NumTasks=%u ", job_ptr->num_tasks); xstrfmtcat(out, "CPUs/Task=%u ", job_ptr->cpus_per_task); if (job_ptr->boards_per_node == (uint16_t) NO_VAL) xstrcat(out, "ReqB:S:C:T=*:"); else xstrfmtcat(out, "ReqB:S:C:T=%u:", job_ptr->boards_per_node); if (job_ptr->sockets_per_board == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->sockets_per_board); if (job_ptr->cores_per_socket == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->cores_per_socket); if (job_ptr->threads_per_core == (uint16_t) NO_VAL) xstrcat(out, "*"); else xstrfmtcat(out, "%u", job_ptr->threads_per_core); xstrcat(out, line_end); /****** Line 16 ******/ /* Tres should already of been converted at this point from simple */ xstrfmtcat(out, "TRES=%s", job_ptr->tres_alloc_str ? job_ptr->tres_alloc_str : job_ptr->tres_req_str); xstrcat(out, line_end); /****** Line 17 ******/ if (job_ptr->sockets_per_node == (uint16_t) NO_VAL) xstrcat(out, "Socks/Node=* "); else xstrfmtcat(out, "Socks/Node=%u ", job_ptr->sockets_per_node); if (job_ptr->ntasks_per_node == (uint16_t) NO_VAL) xstrcat(out, "NtasksPerN:B:S:C=*:"); else xstrfmtcat(out, "NtasksPerN:B:S:C=%u:", job_ptr->ntasks_per_node); if (job_ptr->ntasks_per_board == (uint16_t) NO_VAL) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->ntasks_per_board); if ((job_ptr->ntasks_per_socket == (uint16_t) NO_VAL) || (job_ptr->ntasks_per_socket == (uint16_t) INFINITE)) xstrcat(out, "*:"); else xstrfmtcat(out, "%u:", job_ptr->ntasks_per_socket); if ((job_ptr->ntasks_per_core == (uint16_t) NO_VAL) || (job_ptr->ntasks_per_core == (uint16_t) INFINITE)) xstrcat(out, "* "); else xstrfmtcat(out, "%u ", job_ptr->ntasks_per_core); if (job_ptr->core_spec == (uint16_t) NO_VAL) xstrcat(out, "CoreSpec=*"); else if (job_ptr->core_spec & CORE_SPEC_THREAD) xstrfmtcat(out, "ThreadSpec=%d", (job_ptr->core_spec & (~CORE_SPEC_THREAD))); else xstrfmtcat(out, "CoreSpec=%u", job_ptr->core_spec); xstrcat(out, line_end); if (job_resrcs && cluster_flags & CLUSTER_FLAG_BG) { if ((job_resrcs->cpu_array_cnt > 0) && (job_resrcs->cpu_array_value) && (job_resrcs->cpu_array_reps)) { int length = 0; xstrcat(out, "CPUs="); for (i = 0; i < job_resrcs->cpu_array_cnt; i++) { /* only print 60 characters worth of this record */ if (length > 60) { /* skip to last CPU group entry */ if (i < job_resrcs->cpu_array_cnt - 1) { continue; } /* add ellipsis before last entry */ xstrcat(out, "...,"); } length += xstrfmtcat(out, "%d", job_resrcs->cpus[i]); if (job_resrcs->cpu_array_reps[i] > 1) { length += xstrfmtcat(out, "*%d", job_resrcs->cpu_array_reps[i]); } if (i < job_resrcs->cpu_array_cnt - 1) { xstrcat(out, ","); length++; } } xstrcat(out, line_end); } } else if (job_resrcs && job_resrcs->core_bitmap && ((last = bit_fls(job_resrcs->core_bitmap)) != -1)) { hl = hostlist_create(job_resrcs->nodes); if (!hl) { error("slurm_sprint_job_info: hostlist_create: %s", job_resrcs->nodes); return NULL; } hl_last = hostlist_create(NULL); if (!hl_last) { error("slurm_sprint_job_info: hostlist_create: NULL"); hostlist_destroy(hl); return NULL; } bit_inx = 0; i = sock_inx = sock_reps = 0; abs_node_inx = job_ptr->node_inx[i]; gres_last = ""; /* tmp1[] stores the current cpu(s) allocated */ tmp2[0] = '\0'; /* stores last cpu(s) allocated */ for (rel_node_inx=0; rel_node_inx < job_resrcs->nhosts; rel_node_inx++) { if (sock_reps >= job_resrcs->sock_core_rep_count[sock_inx]) { sock_inx++; sock_reps = 0; } sock_reps++; bit_reps = job_resrcs->sockets_per_node[sock_inx] * job_resrcs->cores_per_socket[sock_inx]; host = hostlist_shift(hl); threads = _threads_per_core(host); cpu_bitmap = bit_alloc(bit_reps * threads); for (j = 0; j < bit_reps; j++) { if (bit_test(job_resrcs->core_bitmap, bit_inx)){ for (k = 0; k < threads; k++) bit_set(cpu_bitmap, (j * threads) + k); } bit_inx++; } bit_fmt(tmp1, sizeof(tmp1), cpu_bitmap); FREE_NULL_BITMAP(cpu_bitmap); /* * If the allocation values for this host are not the * same as the last host, print the report of the last * group of hosts that had identical allocation values. */ if (xstrcmp(tmp1, tmp2) || ((rel_node_inx < job_ptr->gres_detail_cnt) && xstrcmp(job_ptr->gres_detail_str[rel_node_inx], gres_last)) || (last_mem_alloc_ptr != job_resrcs->memory_allocated) || (job_resrcs->memory_allocated && (last_mem_alloc != job_resrcs->memory_allocated[rel_node_inx]))) { if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc( hl_last); xstrfmtcat(out, " Nodes=%s CPU_IDs=%s " "Mem=%"PRIu64" GRES_IDX=%s", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0, gres_last); xfree(last_hosts); xstrcat(out, line_end); hostlist_destroy(hl_last); hl_last = hostlist_create(NULL); } strcpy(tmp2, tmp1); if (rel_node_inx < job_ptr->gres_detail_cnt) { gres_last = job_ptr-> gres_detail_str[rel_node_inx]; } else { gres_last = ""; } last_mem_alloc_ptr = job_resrcs->memory_allocated; if (last_mem_alloc_ptr) last_mem_alloc = job_resrcs-> memory_allocated[rel_node_inx]; else last_mem_alloc = NO_VAL64; } hostlist_push_host(hl_last, host); free(host); if (bit_inx > last) break; if (abs_node_inx > job_ptr->node_inx[i+1]) { i += 2; abs_node_inx = job_ptr->node_inx[i]; } else { abs_node_inx++; } } if (hostlist_count(hl_last)) { last_hosts = hostlist_ranged_string_xmalloc(hl_last); xstrfmtcat(out, " Nodes=%s CPU_IDs=%s Mem=%"PRIu64" GRES_IDX=%s", last_hosts, tmp2, last_mem_alloc_ptr ? last_mem_alloc : 0, gres_last); xfree(last_hosts); xstrcat(out, line_end); } hostlist_destroy(hl); hostlist_destroy(hl_last); } /****** Line 18 ******/ if (job_ptr->pn_min_memory & MEM_PER_CPU) { job_ptr->pn_min_memory &= (~MEM_PER_CPU); tmp6_ptr = "CPU"; } else tmp6_ptr = "Node"; if (cluster_flags & CLUSTER_FLAG_BG) { convert_num_unit((float)job_ptr->pn_min_cpus, tmp1, sizeof(tmp1), UNIT_NONE, NO_VAL, CONVERT_NUM_UNIT_EXACT); xstrfmtcat(out, "MinCPUsNode=%s ", tmp1); } else { xstrfmtcat(out, "MinCPUsNode=%u ", job_ptr->pn_min_cpus); } convert_num_unit((float)job_ptr->pn_min_memory, tmp1, sizeof(tmp1), UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT); convert_num_unit((float)job_ptr->pn_min_tmp_disk, tmp2, sizeof(tmp2), UNIT_MEGA, NO_VAL, CONVERT_NUM_UNIT_EXACT); xstrfmtcat(out, "MinMemory%s=%s MinTmpDiskNode=%s", tmp6_ptr, tmp1, tmp2); xstrcat(out, line_end); /****** Line ******/ secs2time_str((time_t)job_ptr->delay_boot, tmp1, sizeof(tmp1)); xstrfmtcat(out, "Features=%s DelayBoot=%s", job_ptr->features, tmp1); xstrcat(out, line_end); /****** Line ******/ xstrfmtcat(out, "Gres=%s Reservation=%s", job_ptr->gres, job_ptr->resv_name); xstrcat(out, line_end); /****** Line 20 ******/ xstrfmtcat(out, "OverSubscribe=%s Contiguous=%d Licenses=%s Network=%s", job_share_string(job_ptr->shared), job_ptr->contiguous, job_ptr->licenses, job_ptr->network); xstrcat(out, line_end); /****** Line 21 ******/ xstrfmtcat(out, "Command=%s", job_ptr->command); xstrcat(out, line_end); /****** Line 22 ******/ xstrfmtcat(out, "WorkDir=%s", job_ptr->work_dir); if (cluster_flags & CLUSTER_FLAG_BG) { /****** Line 23 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_BG_ID); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "Block_ID=%s", select_buf); } /****** Line 24 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED_SHORT); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrcat(out, select_buf); } /****** Line 26 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_LINUX_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "CnloadImage=%s", select_buf); } /****** Line 27 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MLOADER_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "MloaderImage=%s", select_buf); } /****** Line 28 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_RAMDISK_IMAGE); if (select_buf[0] != '\0') { xstrcat(out, line_end); xstrfmtcat(out, "IoloadImage=%s", select_buf); } } /****** Line (optional) ******/ if (job_ptr->admin_comment) { xstrcat(out, line_end); xstrfmtcat(out, "AdminComment=%s ", job_ptr->admin_comment); } /****** Line (optional) ******/ if (job_ptr->comment) { xstrcat(out, line_end); xstrfmtcat(out, "Comment=%s ", job_ptr->comment); } /****** Line 30 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stderr(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdErr=%s", tmp_path); } /****** Line 31 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stdin(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdIn=%s", tmp_path); } /****** Line 32 (optional) ******/ if (job_ptr->batch_flag) { xstrcat(out, line_end); slurm_get_job_stdout(tmp_path, sizeof(tmp_path), job_ptr); xstrfmtcat(out, "StdOut=%s", tmp_path); } /****** Line 33 (optional) ******/ if (job_ptr->batch_script) { xstrcat(out, line_end); xstrcat(out, "BatchScript=\n"); xstrcat(out, job_ptr->batch_script); } /****** Line 34 (optional) ******/ if (job_ptr->req_switch) { char time_buf[32]; xstrcat(out, line_end); secs2time_str((time_t) job_ptr->wait4switch, time_buf, sizeof(time_buf)); xstrfmtcat(out, "Switches=%u@%s\n", job_ptr->req_switch, time_buf); } /****** Line 35 (optional) ******/ if (job_ptr->burst_buffer) { xstrcat(out, line_end); xstrfmtcat(out, "BurstBuffer=%s", job_ptr->burst_buffer); } /****** Line (optional) ******/ if (job_ptr->burst_buffer_state) { xstrcat(out, line_end); xstrfmtcat(out, "BurstBufferState=%s", job_ptr->burst_buffer_state); } /****** Line 36 (optional) ******/ if (cpu_freq_debug(NULL, NULL, tmp1, sizeof(tmp1), job_ptr->cpu_freq_gov, job_ptr->cpu_freq_min, job_ptr->cpu_freq_max, NO_VAL) != 0) { xstrcat(out, line_end); xstrcat(out, tmp1); } /****** Line 37 ******/ xstrcat(out, line_end); xstrfmtcat(out, "Power=%s", power_flags_str(job_ptr->power_flags)); /****** Line 38 (optional) ******/ if (job_ptr->bitflags) { xstrcat(out, line_end); if (job_ptr->bitflags & GRES_ENFORCE_BIND) xstrcat(out, "GresEnforceBind=Yes"); if (job_ptr->bitflags & KILL_INV_DEP) xstrcat(out, "KillOInInvalidDependent=Yes"); if (job_ptr->bitflags & NO_KILL_INV_DEP) xstrcat(out, "KillOInInvalidDependent=No"); if (job_ptr->bitflags & SPREAD_JOB) xstrcat(out, "SpreadJob=Yes"); } /****** END OF JOB RECORD ******/ if (one_liner) xstrcat(out, "\n"); else xstrcat(out, "\n\n"); return out; }