static void _add_tres_time_2_list(List tres_list, char *tres_str, int type, int seconds, int suspend_seconds, bool times_count) { char *tmp_str = tres_str; int id; uint64_t time, count; local_tres_usage_t *loc_tres; xassert(tres_list); if (!tres_str || !tres_str[0]) return; while (tmp_str) { int loc_seconds = seconds; id = atoi(tmp_str); if (id < 1) { error("_add_tres_time_2_list: no id " "found at %s", tmp_str); break; } if (!(tmp_str = strchr(tmp_str, '='))) { error("_add_tres_time_2_list: no value found for " "id %d '%s'", id, tres_str); xassert(0); break; } /* Take away suspended time from TRES that are idle when the * job was suspended, currently only CPU's fill that bill. */ if (suspend_seconds && (id == TRES_CPU)) { loc_seconds -= suspend_seconds; if (loc_seconds < 1) loc_seconds = 0; } count = slurm_atoull(++tmp_str); time = count * loc_seconds; loc_tres = _add_time_tres(tres_list, type, id, time, times_count); if (loc_tres && !loc_tres->count) loc_tres->count = count; if (!(tmp_str = strchr(tmp_str, ','))) break; tmp_str++; } return; }
static void _add_time_tres_list(List tres_list_out, List tres_list_in, int type, uint64_t time_in, bool times_count) { ListIterator itr; local_tres_usage_t *loc_tres; xassert(tres_list_in); xassert(tres_list_out); itr = list_iterator_create(tres_list_in); while ((loc_tres = list_next(itr))) _add_time_tres(tres_list_out, type, loc_tres->id, time_in ? time_in : loc_tres->total_time, times_count); list_iterator_destroy(itr); }
static void _add_tres_time_2_list(List tres_list, char *tres_str, int type, int seconds, bool times_count) { char *tmp_str = tres_str; int id; uint64_t time, count; local_tres_usage_t *loc_tres; xassert(tres_list); if (!tres_str || !tres_str[0]) return; while (tmp_str) { id = atoi(tmp_str); if (id < 1) { error("_add_tres_time_2_list: no id " "found at %s", tmp_str); break; } if (!(tmp_str = strchr(tmp_str, '='))) { error("_add_tres_time_2_list: no value found for " "id %d '%s'", id, tres_str); xassert(0); break; } count = slurm_atoull(++tmp_str); time = count * seconds; loc_tres = _add_time_tres(tres_list, type, id, time, times_count); if (loc_tres && !loc_tres->count) loc_tres->count = count; if (!(tmp_str = strchr(tmp_str, ','))) break; tmp_str++; } return; }
extern int as_mysql_hourly_rollup(mysql_conn_t *mysql_conn, char *cluster_name, time_t start, time_t end, uint16_t archive_data) { int rc = SLURM_SUCCESS; int add_sec = 3600; int i=0; time_t now = time(NULL); time_t curr_start = start; time_t curr_end = curr_start + add_sec; char *query = NULL; MYSQL_RES *result = NULL; MYSQL_ROW row; ListIterator a_itr = NULL; ListIterator c_itr = NULL; ListIterator w_itr = NULL; ListIterator r_itr = NULL; List assoc_usage_list = list_create(_destroy_local_id_usage); List cluster_down_list = list_create(_destroy_local_cluster_usage); List wckey_usage_list = list_create(_destroy_local_id_usage); List resv_usage_list = list_create(_destroy_local_resv_usage); uint16_t track_wckey = slurm_get_track_wckey(); local_cluster_usage_t *loc_c_usage = NULL; local_cluster_usage_t *c_usage = NULL; local_resv_usage_t *r_usage = NULL; local_id_usage_t *a_usage = NULL; local_id_usage_t *w_usage = NULL; /* char start_char[20], end_char[20]; */ char *job_req_inx[] = { "job.job_db_inx", // "job.id_job", "job.id_assoc", "job.id_wckey", "job.array_task_pending", "job.time_eligible", "job.time_start", "job.time_end", "job.time_suspended", "job.cpus_req", "job.id_resv", "job.tres_alloc", "SUM(step.consumed_energy)" }; char *job_str = NULL; enum { JOB_REQ_DB_INX, // JOB_REQ_JOBID, JOB_REQ_ASSOCID, JOB_REQ_WCKEYID, JOB_REQ_ARRAY_PENDING, JOB_REQ_ELG, JOB_REQ_START, JOB_REQ_END, JOB_REQ_SUSPENDED, JOB_REQ_RCPU, JOB_REQ_RESVID, JOB_REQ_TRES, JOB_REQ_ENERGY, JOB_REQ_COUNT }; char *suspend_req_inx[] = { "time_start", "time_end" }; char *suspend_str = NULL; enum { SUSPEND_REQ_START, SUSPEND_REQ_END, SUSPEND_REQ_COUNT }; char *resv_req_inx[] = { "id_resv", "assoclist", "flags", "tres", "time_start", "time_end" }; char *resv_str = NULL; enum { RESV_REQ_ID, RESV_REQ_ASSOCS, RESV_REQ_FLAGS, RESV_REQ_TRES, RESV_REQ_START, RESV_REQ_END, RESV_REQ_COUNT }; i=0; xstrfmtcat(job_str, "%s", job_req_inx[i]); for(i=1; i<JOB_REQ_COUNT; i++) { xstrfmtcat(job_str, ", %s", job_req_inx[i]); } i=0; xstrfmtcat(suspend_str, "%s", suspend_req_inx[i]); for(i=1; i<SUSPEND_REQ_COUNT; i++) { xstrfmtcat(suspend_str, ", %s", suspend_req_inx[i]); } i=0; xstrfmtcat(resv_str, "%s", resv_req_inx[i]); for(i=1; i<RESV_REQ_COUNT; i++) { xstrfmtcat(resv_str, ", %s", resv_req_inx[i]); } /* info("begin start %s", slurm_ctime2(&curr_start)); */ /* info("begin end %s", slurm_ctime2(&curr_end)); */ a_itr = list_iterator_create(assoc_usage_list); c_itr = list_iterator_create(cluster_down_list); w_itr = list_iterator_create(wckey_usage_list); r_itr = list_iterator_create(resv_usage_list); while (curr_start < end) { int last_id = -1; int last_wckeyid = -1; if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "%s curr hour is now %ld-%ld", cluster_name, curr_start, curr_end); /* info("start %s", slurm_ctime2(&curr_start)); */ /* info("end %s", slurm_ctime2(&curr_end)); */ c_usage = _setup_cluster_usage(mysql_conn, cluster_name, curr_start, curr_end, cluster_down_list); // now get the reservations during this time /* If a reservation has the IGNORE_JOBS flag we don't * have an easy way to distinguish the cpus a job not * running in the reservation, but on it's cpus. * So we will just ignore these reservations for * accounting purposes. */ query = xstrdup_printf("select %s from \"%s_%s\" where " "(time_start < %ld && time_end >= %ld) " "&& !(flags & %u)" "order by time_start", resv_str, cluster_name, resv_table, curr_end, curr_start, RESERVE_FLAG_IGN_JOBS); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); if (!(result = mysql_db_query_ret( mysql_conn, query, 0))) { rc = SLURM_ERROR; goto end_it; } xfree(query); if (c_usage) xassert(c_usage->loc_tres); /* If a reservation overlaps another reservation we total up everything here as if they didn't but when calculating the total time for a cluster we will remove the extra time received. This may result in unexpected results with association based reports since the association is given the total amount of time of each reservation, thus equaling more time than is available. Job/Cluster/Reservation reports should be fine though since we really don't over allocate resources. The issue with us not being able to handle overlapping reservations here is unless the reservation completely overlaps the other reservation we have no idea how many cpus should be removed since this could be a heterogeneous system. This same problem exists when a reservation is created with the ignore_jobs option which will allow jobs to continue to run in the reservation that aren't suppose to. */ while ((row = mysql_fetch_row(result))) { time_t row_start = slurm_atoul(row[RESV_REQ_START]); time_t row_end = slurm_atoul(row[RESV_REQ_END]); uint32_t row_flags = slurm_atoul(row[RESV_REQ_FLAGS]); int resv_seconds; if (row_start < curr_start) row_start = curr_start; if (!row_end || row_end > curr_end) row_end = curr_end; /* Don't worry about it if the time is less * than 1 second. */ if ((resv_seconds = (row_end - row_start)) < 1) continue; r_usage = xmalloc(sizeof(local_resv_usage_t)); r_usage->id = slurm_atoul(row[RESV_REQ_ID]); r_usage->local_assocs = list_create(slurm_destroy_char); slurm_addto_char_list(r_usage->local_assocs, row[RESV_REQ_ASSOCS]); r_usage->loc_tres = list_create(_destroy_local_tres_usage); _add_tres_2_list(r_usage->loc_tres, row[RESV_REQ_TRES], resv_seconds); r_usage->start = row_start; r_usage->end = row_end; list_append(resv_usage_list, r_usage); /* Since this reservation was added to the cluster and only certain people could run there we will use this as allocated time on the system. If the reservation was a maintenance then we add the time to planned down time. */ /* only record time for the clusters that have registered. This continue should rarely if ever happen. */ if (!c_usage) continue; _add_time_tres_list(c_usage->loc_tres, r_usage->loc_tres, (row_flags & RESERVE_FLAG_MAINT) ? TIME_PDOWN : TIME_ALLOC, 0, 0); /* slurm_make_time_str(&r_usage->start, start_char, */ /* sizeof(start_char)); */ /* slurm_make_time_str(&r_usage->end, end_char, */ /* sizeof(end_char)); */ /* info("adding this much %lld to cluster %s " */ /* "%d %d %s - %s", */ /* r_usage->total_time, c_usage->name, */ /* (row_flags & RESERVE_FLAG_MAINT), */ /* r_usage->id, start_char, end_char); */ } mysql_free_result(result); /* now get the jobs during this time only */ query = xstrdup_printf("select %s from \"%s_%s\" as job " "left outer join \"%s_%s\" as step on " "job.job_db_inx=step.job_db_inx " "and (step.id_step>=0) " "where (job.time_eligible && " "job.time_eligible < %ld && " "(job.time_end >= %ld || " "job.time_end = 0)) " "group by job.job_db_inx " "order by job.id_assoc, " "job.time_eligible", job_str, cluster_name, job_table, cluster_name, step_table, curr_end, curr_start); if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); if (!(result = mysql_db_query_ret( mysql_conn, query, 0))) { rc = SLURM_ERROR; goto end_it; } xfree(query); while ((row = mysql_fetch_row(result))) { //uint32_t job_id = slurm_atoul(row[JOB_REQ_JOBID]); uint32_t assoc_id = slurm_atoul(row[JOB_REQ_ASSOCID]); uint32_t wckey_id = slurm_atoul(row[JOB_REQ_WCKEYID]); uint32_t array_pending = slurm_atoul(row[JOB_REQ_ARRAY_PENDING]); uint32_t resv_id = slurm_atoul(row[JOB_REQ_RESVID]); time_t row_eligible = slurm_atoul(row[JOB_REQ_ELG]); time_t row_start = slurm_atoul(row[JOB_REQ_START]); time_t row_end = slurm_atoul(row[JOB_REQ_END]); uint32_t row_rcpu = slurm_atoul(row[JOB_REQ_RCPU]); List loc_tres = NULL; uint64_t row_energy = 0; int loc_seconds = 0; int seconds = 0, suspend_seconds = 0; if (row[JOB_REQ_ENERGY]) row_energy = slurm_atoull(row[JOB_REQ_ENERGY]); if (row_start && (row_start < curr_start)) row_start = curr_start; if (!row_start && row_end) row_start = row_end; if (!row_end || row_end > curr_end) row_end = curr_end; if (!row_start || ((row_end - row_start) < 1)) goto calc_cluster; seconds = (row_end - row_start); if (slurm_atoul(row[JOB_REQ_SUSPENDED])) { MYSQL_RES *result2 = NULL; MYSQL_ROW row2; /* get the suspended time for this job */ query = xstrdup_printf( "select %s from \"%s_%s\" where " "(time_start < %ld && (time_end >= %ld " "|| time_end = 0)) && job_db_inx=%s " "order by time_start", suspend_str, cluster_name, suspend_table, curr_end, curr_start, row[JOB_REQ_DB_INX]); debug4("%d(%s:%d) query\n%s", mysql_conn->conn, THIS_FILE, __LINE__, query); if (!(result2 = mysql_db_query_ret( mysql_conn, query, 0))) { rc = SLURM_ERROR; mysql_free_result(result); goto end_it; } xfree(query); while ((row2 = mysql_fetch_row(result2))) { int tot_time = 0; time_t local_start = slurm_atoul( row2[SUSPEND_REQ_START]); time_t local_end = slurm_atoul( row2[SUSPEND_REQ_END]); if (!local_start) continue; if (row_start > local_start) local_start = row_start; if (!local_end || row_end < local_end) local_end = row_end; tot_time = (local_end - local_start); if (tot_time > 0) suspend_seconds += tot_time; } mysql_free_result(result2); } if (last_id != assoc_id) { a_usage = xmalloc(sizeof(local_id_usage_t)); a_usage->id = assoc_id; list_append(assoc_usage_list, a_usage); last_id = assoc_id; /* a_usage->loc_tres is made later, don't do it here. */ } /* Short circuit this so so we don't get a pointer. */ if (!track_wckey) last_wckeyid = wckey_id; /* do the wckey calculation */ if (last_wckeyid != wckey_id) { list_iterator_reset(w_itr); while ((w_usage = list_next(w_itr))) if (w_usage->id == wckey_id) break; if (!w_usage) { w_usage = xmalloc( sizeof(local_id_usage_t)); w_usage->id = wckey_id; list_append(wckey_usage_list, w_usage); w_usage->loc_tres = list_create( _destroy_local_tres_usage); } last_wckeyid = wckey_id; } /* do the cluster allocated calculation */ calc_cluster: /* We need to have this clean for each job * since we add the time to the cluster * individually. */ loc_tres = list_create(_destroy_local_tres_usage); _add_tres_time_2_list(loc_tres, row[JOB_REQ_TRES], TIME_ALLOC, seconds, suspend_seconds, 0); if (w_usage) _add_tres_time_2_list(w_usage->loc_tres, row[JOB_REQ_TRES], TIME_ALLOC, seconds, suspend_seconds, 0); _add_time_tres(loc_tres, TIME_ALLOC, TRES_ENERGY, row_energy, 0); if (w_usage) _add_time_tres( w_usage->loc_tres, TIME_ALLOC, TRES_ENERGY, row_energy, 0); /* Now figure out there was a disconnected slurmctld durning this job. */ list_iterator_reset(c_itr); while ((loc_c_usage = list_next(c_itr))) { int temp_end = row_end; int temp_start = row_start; if (loc_c_usage->start > temp_start) temp_start = loc_c_usage->start; if (loc_c_usage->end < temp_end) temp_end = loc_c_usage->end; loc_seconds = (temp_end - temp_start); if (loc_seconds < 1) continue; _remove_job_tres_time_from_cluster( loc_c_usage->loc_tres, loc_tres, loc_seconds); /* info("Job %u was running for " */ /* "%d seconds while " */ /* "cluster %s's slurmctld " */ /* "wasn't responding", */ /* job_id, loc_seconds, cluster_name); */ } /* first figure out the reservation */ if (resv_id) { if (seconds <= 0) { _transfer_loc_tres(&loc_tres, a_usage); continue; } /* Since we have already added the entire reservation as used time on the cluster we only need to calculate the used time for the reservation and then divy up the unused time over the associations able to run in the reservation. Since the job was to run, or ran a reservation we don't care about eligible time since that could totally skew the clusters reserved time since the job may be able to run outside of the reservation. */ list_iterator_reset(r_itr); while ((r_usage = list_next(r_itr))) { int temp_end, temp_start; /* since the reservation could have changed in some way, thus making a new reservation record in the database, we have to make sure all the reservations are checked to see if such a thing has happened */ if (r_usage->id != resv_id) continue; temp_end = row_end; temp_start = row_start; if (r_usage->start > temp_start) temp_start = r_usage->start; if (r_usage->end < temp_end) temp_end = r_usage->end; loc_seconds = (temp_end - temp_start); if (loc_seconds > 0) _add_time_tres_list( r_usage->loc_tres, loc_tres, TIME_ALLOC, loc_seconds, 1); } _transfer_loc_tres(&loc_tres, a_usage); continue; } /* only record time for the clusters that have registered. This continue should rarely if ever happen. */ if (!c_usage) { _transfer_loc_tres(&loc_tres, a_usage); continue; } if (row_start && (seconds > 0)) { /* info("%d assoc %d adds " */ /* "(%d)(%d-%d) * %d = %d " */ /* "to %d", */ /* job_id, */ /* a_usage->id, */ /* seconds, */ /* row_end, row_start, */ /* row_acpu, */ /* seconds * row_acpu, */ /* row_acpu); */ _add_job_alloc_time_to_cluster( c_usage->loc_tres, loc_tres); } /* The loc_tres isn't needed after this so * transfer to the association and go on our * merry way. */ _transfer_loc_tres(&loc_tres, a_usage); /* now reserved time */ if (!row_start || (row_start >= c_usage->start)) { int temp_end = row_start; int temp_start = row_eligible; if (c_usage->start > temp_start) temp_start = c_usage->start; if (c_usage->end < temp_end) temp_end = c_usage->end; loc_seconds = (temp_end - temp_start); if (loc_seconds > 0) { /* If we have pending jobs in an array they haven't been inserted into the database yet as proper job records, so handle them here. */ if (array_pending) loc_seconds *= array_pending; /* info("%d assoc %d reserved " */ /* "(%d)(%d-%d) * %d * %d = %d " */ /* "to %d", */ /* job_id, */ /* assoc_id, */ /* temp_end - temp_start, */ /* temp_end, temp_start, */ /* row_rcpu, */ /* array_pending, */ /* loc_seconds, */ /* row_rcpu); */ _add_time_tres(c_usage->loc_tres, TIME_RESV, TRES_CPU, loc_seconds * row_rcpu, 0); } } } mysql_free_result(result); /* now figure out how much more to add to the associations that could had run in the reservation */ list_iterator_reset(r_itr); while ((r_usage = list_next(r_itr))) { ListIterator t_itr; local_tres_usage_t *loc_tres; if (!r_usage->loc_tres || !list_count(r_usage->loc_tres)) continue; t_itr = list_iterator_create(r_usage->loc_tres); while ((loc_tres = list_next(t_itr))) { int64_t idle = loc_tres->total_time - loc_tres->time_alloc; char *assoc = NULL; ListIterator tmp_itr = NULL; int assoc_cnt, resv_unused_secs; if (idle <= 0) break; /* since this will be * the same for all TRES */ /* now divide that time by the number of associations in the reservation and add them to each association */ resv_unused_secs = idle; assoc_cnt = list_count(r_usage->local_assocs); if (assoc_cnt) resv_unused_secs /= assoc_cnt; /* info("resv %d got %d seconds for TRES %u " */ /* "for %d assocs", */ /* r_usage->id, resv_unused_secs, */ /* loc_tres->id, */ /* list_count(r_usage->local_assocs)); */ tmp_itr = list_iterator_create( r_usage->local_assocs); while ((assoc = list_next(tmp_itr))) { uint32_t associd = slurm_atoul(assoc); if ((last_id != associd) && !(a_usage = list_find_first( assoc_usage_list, _find_id_usage, &associd))) { a_usage = xmalloc( sizeof(local_id_usage_t)); a_usage->id = associd; list_append(assoc_usage_list, a_usage); last_id = associd; a_usage->loc_tres = list_create( _destroy_local_tres_usage); } _add_time_tres(a_usage->loc_tres, TIME_ALLOC, loc_tres->id, resv_unused_secs, 0); } list_iterator_destroy(tmp_itr); } list_iterator_destroy(t_itr); } /* now apply the down time from the slurmctld disconnects */ if (c_usage) { list_iterator_reset(c_itr); while ((loc_c_usage = list_next(c_itr))) { local_tres_usage_t *loc_tres; ListIterator tmp_itr = list_iterator_create( loc_c_usage->loc_tres); while ((loc_tres = list_next(tmp_itr))) _add_time_tres(c_usage->loc_tres, TIME_DOWN, loc_tres->id, loc_tres->total_time, 0); list_iterator_destroy(tmp_itr); } if ((rc = _process_cluster_usage( mysql_conn, cluster_name, curr_start, curr_end, now, c_usage)) != SLURM_SUCCESS) { goto end_it; } } list_iterator_reset(a_itr); while ((a_usage = list_next(a_itr))) _create_id_usage_insert(cluster_name, ASSOC_TABLES, curr_start, now, a_usage, &query); if (query) { if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); rc = mysql_db_query(mysql_conn, query); xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't add assoc hour rollup"); goto end_it; } } if (!track_wckey) goto end_loop; list_iterator_reset(w_itr); while ((w_usage = list_next(w_itr))) _create_id_usage_insert(cluster_name, WCKEY_TABLES, curr_start, now, w_usage, &query); if (query) { if (debug_flags & DEBUG_FLAG_DB_USAGE) DB_DEBUG(mysql_conn->conn, "query\n%s", query); rc = mysql_db_query(mysql_conn, query); xfree(query); if (rc != SLURM_SUCCESS) { error("Couldn't add wckey hour rollup"); goto end_it; } } end_loop: _destroy_local_cluster_usage(c_usage); _destroy_local_id_usage(a_usage); _destroy_local_id_usage(w_usage); _destroy_local_resv_usage(r_usage); c_usage = NULL; r_usage = NULL; a_usage = NULL; w_usage = NULL; list_flush(assoc_usage_list); list_flush(cluster_down_list); list_flush(wckey_usage_list); list_flush(resv_usage_list); curr_start = curr_end; curr_end = curr_start + add_sec; } end_it: xfree(query); xfree(suspend_str); xfree(job_str); xfree(resv_str); _destroy_local_cluster_usage(c_usage); _destroy_local_id_usage(a_usage); _destroy_local_id_usage(w_usage); _destroy_local_resv_usage(r_usage); if (a_itr) list_iterator_destroy(a_itr); if (c_itr) list_iterator_destroy(c_itr); if (w_itr) list_iterator_destroy(w_itr); if (r_itr) list_iterator_destroy(r_itr); FREE_NULL_LIST(assoc_usage_list); FREE_NULL_LIST(cluster_down_list); FREE_NULL_LIST(wckey_usage_list); FREE_NULL_LIST(resv_usage_list); /* info("stop start %s", slurm_ctime2(&curr_start)); */ /* info("stop end %s", slurm_ctime2(&curr_end)); */ /* go check to see if we archive and purge */ if (rc == SLURM_SUCCESS) { if (mysql_db_commit(mysql_conn)) { char start[25], end[25]; error("Couldn't commit cluster (%s) " "hour rollup for %s - %s", cluster_name, slurm_ctime2_r(&curr_start, start), slurm_ctime2_r(&curr_end, end)); rc = SLURM_ERROR; } else rc = _process_purge(mysql_conn, cluster_name, archive_data, SLURMDB_PURGE_HOURS); } return rc; }