static slurmdb_step_rec_t *_slurmdb_create_step_rec( filetxt_step_rec_t *filetxt_step) { slurmdb_step_rec_t *slurmdb_step = slurmdb_create_step_rec(); slurmdb_step->elapsed = filetxt_step->elapsed; slurmdb_step->end = filetxt_step->end; slurmdb_step->exitcode = filetxt_step->exitcode; slurmdb_step->ncpus = filetxt_step->ncpus; if (filetxt_step->nodes) { hostlist_t hl = hostlist_create(filetxt_step->nodes); slurmdb_step->nnodes = hostlist_count(hl); hostlist_destroy(hl); } slurmdb_step->nodes = xstrdup(filetxt_step->nodes); slurmdb_step->requid = filetxt_step->requid; memcpy(&slurmdb_step->stats, &filetxt_step->stats, sizeof(slurmdb_stats_t)); slurmdb_step->start = slurmdb_step->end - slurmdb_step->elapsed; slurmdb_step->state = filetxt_step->status; slurmdb_step->stepid = filetxt_step->stepnum; slurmdb_step->stepname = xstrdup(filetxt_step->stepname); slurmdb_step->sys_cpu_sec = filetxt_step->rusage.ru_stime.tv_sec; slurmdb_step->sys_cpu_usec = filetxt_step->rusage.ru_stime.tv_usec; slurmdb_step->tot_cpu_sec = filetxt_step->tot_cpu_sec; slurmdb_step->tot_cpu_usec = filetxt_step->tot_cpu_usec; slurmdb_step->user_cpu_sec = filetxt_step->rusage.ru_utime.tv_sec; slurmdb_step->user_cpu_usec = filetxt_step->rusage.ru_utime.tv_usec; return slurmdb_step; }
static int _cluster_get_jobs(mysql_conn_t *mysql_conn, slurmdb_user_rec_t *user, slurmdb_job_cond_t *job_cond, char *cluster_name, char *job_fields, char *step_fields, char *sent_extra, bool is_admin, int only_pending, List sent_list) { char *query = NULL; char *extra = xstrdup(sent_extra); uint16_t private_data = slurm_get_private_data(); slurmdb_selected_step_t *selected_step = NULL; MYSQL_RES *result = NULL, *step_result = NULL; MYSQL_ROW row, step_row; slurmdb_job_rec_t *job = NULL; slurmdb_step_rec_t *step = NULL; time_t now = time(NULL); List job_list = list_create(slurmdb_destroy_job_rec); ListIterator itr = NULL, itr2 = NULL; List local_cluster_list = NULL; int set = 0; char *prefix="t2"; int rc = SLURM_SUCCESS; int last_id = -1, curr_id = -1; local_cluster_t *curr_cluster = NULL; /* This is here to make sure we are looking at only this user * if this flag is set. We also include any accounts they may be * coordinator of. */ if (!is_admin && (private_data & PRIVATE_DATA_JOBS)) { query = xstrdup_printf("select lft from \"%s_%s\" " "where user='******'", cluster_name, assoc_table, user->name); if (user->coord_accts) { slurmdb_coord_rec_t *coord = NULL; itr = list_iterator_create(user->coord_accts); while ((coord = list_next(itr))) { xstrfmtcat(query, " || acct='%s'", coord->name); } list_iterator_destroy(itr); } if (debug_flags & DEBUG_FLAG_DB_JOB) DB_DEBUG(mysql_conn->conn, "query\n%s", query); if (!(result = mysql_db_query_ret( mysql_conn, query, 0))) { xfree(extra); xfree(query); rc = SLURM_ERROR; goto end_it; } xfree(query); set = 0; while ((row = mysql_fetch_row(result))) { if (set) { xstrfmtcat(extra, " || (%s between %s.lft and %s.rgt)", row[0], prefix, prefix); } else { set = 1; if (extra) xstrfmtcat(extra, " && ((%s between %s.lft " "and %s.rgt)", row[0], prefix, prefix); else xstrfmtcat(extra, " where ((%s between %s.lft " "and %s.rgt)", row[0], prefix, prefix); } } mysql_free_result(result); if (set) xstrcat(extra, ")"); else { xfree(extra); debug("User %s has no assocations, and is not admin, " "so not returning any jobs.", user->name); /* This user has no valid associations, so * they will not have any jobs. */ goto end_it; } } setup_job_cluster_cond_limits(mysql_conn, job_cond, cluster_name, &extra); query = xstrdup_printf("select %s from \"%s_%s\" as t1 " "left join \"%s_%s\" as t2 " "on t1.id_assoc=t2.id_assoc " "left join \"%s_%s\" as t3 " "on t1.id_resv=t3.id_resv && " "((t1.time_start && " "(t3.time_start < t1.time_start && " "(t3.time_end >= t1.time_start || " "t3.time_end = 0))) || " "((t3.time_start < t1.time_submit && " "(t3.time_end >= t1.time_submit || " "t3.time_end = 0)) || " "(t3.time_start > t1.time_submit)))", job_fields, cluster_name, job_table, cluster_name, assoc_table, cluster_name, resv_table); if (extra) { xstrcat(query, extra); xfree(extra); } /* Here we want to order them this way in such a way so it is easy to look for duplicates, it is also easy to sort the resized jobs. */ xstrcat(query, " group by id_job, time_submit desc"); if (debug_flags & DEBUG_FLAG_DB_JOB) DB_DEBUG(mysql_conn->conn, "query\n%s", query); if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) { xfree(query); rc = SLURM_ERROR; goto end_it; } xfree(query); /* Here we set up environment to check used nodes of jobs. Since we store the bitmap of the entire cluster we can use that to set up a hostlist and set up the bitmap to make things work. This should go before the setup of conds since we could update the start/end time. */ if (job_cond && job_cond->used_nodes) { local_cluster_list = setup_cluster_list_with_inx( mysql_conn, job_cond, (void **)&curr_cluster); if (!local_cluster_list) { rc = SLURM_ERROR; goto end_it; } } while ((row = mysql_fetch_row(result))) { char *id = row[JOB_REQ_ID]; bool job_ended = 0; int start = slurm_atoul(row[JOB_REQ_START]); curr_id = slurm_atoul(row[JOB_REQ_JOBID]); if (job_cond && !job_cond->duplicates && (curr_id == last_id) && (slurm_atoul(row[JOB_REQ_STATE]) != JOB_RESIZING)) continue; /* check the bitmap to see if this is one of the jobs we are looking for */ /* Use start time instead of submit time because node * indexes are determined at start time and not submit. */ if (!good_nodes_from_inx(local_cluster_list, (void **)&curr_cluster, row[JOB_REQ_NODE_INX], start)) { last_id = curr_id; continue; } job = slurmdb_create_job_rec(); job->state = slurm_atoul(row[JOB_REQ_STATE]); if (curr_id == last_id) /* put in reverse so we order by the submit getting larger which it is given to us in reverse order from the database */ list_prepend(job_list, job); else list_append(job_list, job); last_id = curr_id; if (row[JOB_REQ_GRES_ALLOC]) job->alloc_gres = xstrdup(row[JOB_REQ_GRES_ALLOC]); else job->alloc_gres = xstrdup(""); job->alloc_nodes = slurm_atoul(row[JOB_REQ_ALLOC_NODES]); job->associd = slurm_atoul(row[JOB_REQ_ASSOCID]); job->array_job_id = slurm_atoul(row[JOB_REQ_ARRAYJOBID]); job->array_task_id = slurm_atoul(row[JOB_REQ_ARRAYTASKID]); job->resvid = slurm_atoul(row[JOB_REQ_RESVID]); /* This shouldn't happen with new jobs, but older jobs * could of been added without a start and so the * array_task_id would be 0 instead of it's real value */ if (!job->array_job_id && !job->array_task_id) job->array_task_id = NO_VAL; if (row[JOB_REQ_RESV_NAME] && row[JOB_REQ_RESV_NAME][0]) job->resv_name = xstrdup(row[JOB_REQ_RESV_NAME]); job->cluster = xstrdup(cluster_name); /* we want a blank wckey if the name is null */ if (row[JOB_REQ_WCKEY]) job->wckey = xstrdup(row[JOB_REQ_WCKEY]); else job->wckey = xstrdup(""); job->wckeyid = slurm_atoul(row[JOB_REQ_WCKEYID]); if (row[JOB_REQ_USER_NAME]) job->user = xstrdup(row[JOB_REQ_USER_NAME]); else job->uid = slurm_atoul(row[JOB_REQ_UID]); if (row[JOB_REQ_LFT]) job->lft = slurm_atoul(row[JOB_REQ_LFT]); if (row[JOB_REQ_ACCOUNT] && row[JOB_REQ_ACCOUNT][0]) job->account = xstrdup(row[JOB_REQ_ACCOUNT]); else if (row[JOB_REQ_ACCOUNT1] && row[JOB_REQ_ACCOUNT1][0]) job->account = xstrdup(row[JOB_REQ_ACCOUNT1]); if (row[JOB_REQ_ARRAY_STR] && row[JOB_REQ_ARRAY_STR][0]) job->array_task_str = xstrdup(row[JOB_REQ_ARRAY_STR]); if (row[JOB_REQ_ARRAY_MAX]) job->array_max_tasks = slurm_atoul(row[JOB_REQ_ARRAY_MAX]); if (row[JOB_REQ_BLOCKID]) job->blockid = xstrdup(row[JOB_REQ_BLOCKID]); job->eligible = slurm_atoul(row[JOB_REQ_ELIGIBLE]); job->submit = slurm_atoul(row[JOB_REQ_SUBMIT]); job->start = start; job->end = slurm_atoul(row[JOB_REQ_END]); job->timelimit = slurm_atoul(row[JOB_REQ_TIMELIMIT]); /* since the job->end could be set later end it here */ if (job->end) { job_ended = 1; if (!job->start || (job->start > job->end)) job->start = job->end; } if (job_cond && !job_cond->without_usage_truncation && job_cond->usage_start) { if (job->start && (job->start < job_cond->usage_start)) job->start = job_cond->usage_start; if (!job->end || job->end > job_cond->usage_end) job->end = job_cond->usage_end; if (!job->start) job->start = job->end; job->elapsed = job->end - job->start; if (row[JOB_REQ_SUSPENDED]) { MYSQL_RES *result2 = NULL; MYSQL_ROW row2; /* get the suspended time for this job */ query = xstrdup_printf( "select time_start, time_end from " "\"%s_%s\" where " "(time_start < %ld && (time_end >= %ld " "|| time_end = 0)) && job_db_inx=%s " "order by time_start", cluster_name, suspend_table, job_cond->usage_end, job_cond->usage_start, id); debug4("%d(%s:%d) query\n%s", mysql_conn->conn, THIS_FILE, __LINE__, query); if (!(result2 = mysql_db_query_ret( mysql_conn, query, 0))) { FREE_NULL_LIST(job_list); job_list = NULL; break; } xfree(query); while ((row2 = mysql_fetch_row(result2))) { time_t local_start = slurm_atoul(row2[0]); time_t local_end = slurm_atoul(row2[1]); if (!local_start) continue; if (job->start > local_start) local_start = job->start; if (job->end < local_end) local_end = job->end; if ((local_end - local_start) < 1) continue; job->elapsed -= (local_end - local_start); job->suspended += (local_end - local_start); } mysql_free_result(result2); } } else { job->suspended = slurm_atoul(row[JOB_REQ_SUSPENDED]); /* fix the suspended number to be correct */ if (job->state == JOB_SUSPENDED) job->suspended = now - job->suspended; if (!job->start) { job->elapsed = 0; } else if (!job->end) { job->elapsed = now - job->start; } else { job->elapsed = job->end - job->start; } job->elapsed -= job->suspended; } if ((int)job->elapsed < 0) job->elapsed = 0; job->jobid = curr_id; job->jobname = xstrdup(row[JOB_REQ_NAME]); job->gid = slurm_atoul(row[JOB_REQ_GID]); job->exitcode = slurm_atoul(row[JOB_REQ_EXIT_CODE]); job->derived_ec = slurm_atoul(row[JOB_REQ_DERIVED_EC]); job->derived_es = xstrdup(row[JOB_REQ_DERIVED_ES]); if (row[JOB_REQ_PARTITION]) job->partition = xstrdup(row[JOB_REQ_PARTITION]); if (row[JOB_REQ_NODELIST]) job->nodes = xstrdup(row[JOB_REQ_NODELIST]); if (!job->nodes || !xstrcmp(job->nodes, "(null)")) { xfree(job->nodes); job->nodes = xstrdup("(unknown)"); } job->track_steps = slurm_atoul(row[JOB_REQ_TRACKSTEPS]); job->priority = slurm_atoul(row[JOB_REQ_PRIORITY]); job->req_cpus = slurm_atoul(row[JOB_REQ_REQ_CPUS]); if (row[JOB_REQ_GRES_REQ]) job->req_gres = xstrdup(row[JOB_REQ_GRES_REQ]); else job->req_gres = xstrdup(""); job->req_mem = slurm_atoul(row[JOB_REQ_REQ_MEM]); job->requid = slurm_atoul(row[JOB_REQ_KILL_REQUID]); job->qosid = slurm_atoul(row[JOB_REQ_QOS]); job->show_full = 1; if (row[JOB_REQ_TRESA]) job->tres_alloc_str = xstrdup(row[JOB_REQ_TRESA]); if (row[JOB_REQ_TRESR]) job->tres_req_str = xstrdup(row[JOB_REQ_TRESR]); if (only_pending || (job_cond && job_cond->without_steps)) goto skip_steps; if (job_cond && job_cond->step_list && list_count(job_cond->step_list)) { set = 0; itr = list_iterator_create(job_cond->step_list); while ((selected_step = list_next(itr))) { if ((selected_step->jobid != job->jobid) && (selected_step->jobid != job->array_job_id)) { continue; } else if ((selected_step->array_task_id != INFINITE) && (selected_step->array_task_id != job->array_task_id)) continue; else if (selected_step->stepid == NO_VAL) { job->show_full = 1; break; } else if (selected_step->stepid == INFINITE) selected_step->stepid = SLURM_BATCH_SCRIPT; if (set) xstrcat(extra, " || "); else xstrcat(extra, " && ("); /* The stepid could be -2 so use %d not %u */ xstrfmtcat(extra, "t1.id_step=%d", selected_step->stepid); set = 1; job->show_full = 0; /* Set it back just in case we are looking at a job array. */ if (selected_step->stepid == SLURM_BATCH_SCRIPT) selected_step->stepid = INFINITE; } list_iterator_destroy(itr); if (set) xstrcat(extra, ")"); } query = xstrdup_printf("select %s from \"%s_%s\" as t1 " "where t1.job_db_inx=%s", step_fields, cluster_name, step_table, id); if (extra) { xstrcat(query, extra); xfree(extra); } if (debug_flags & DEBUG_FLAG_DB_STEP) DB_DEBUG(mysql_conn->conn, "query\n%s", query); if (!(step_result = mysql_db_query_ret( mysql_conn, query, 0))) { xfree(query); rc = SLURM_ERROR; goto end_it; } xfree(query); /* Querying the steps in the fashion was faster than doing only 1 query and then matching the steps up later with the job. */ while ((step_row = mysql_fetch_row(step_result))) { /* check the bitmap to see if this is one of the steps we are looking for */ if (!good_nodes_from_inx(local_cluster_list, (void **)&curr_cluster, step_row[STEP_REQ_NODE_INX], start)) continue; step = slurmdb_create_step_rec(); step->tot_cpu_sec = 0; step->tot_cpu_usec = 0; step->job_ptr = job; if (!job->first_step_ptr) job->first_step_ptr = step; list_append(job->steps, step); step->stepid = slurm_atoul(step_row[STEP_REQ_STEPID]); /* info("got step %u.%u", */ /* job->header.jobnum, step->stepnum); */ step->state = slurm_atoul(step_row[STEP_REQ_STATE]); step->exitcode = slurm_atoul(step_row[STEP_REQ_EXIT_CODE]); step->nnodes = slurm_atoul(step_row[STEP_REQ_NODES]); step->ntasks = slurm_atoul(step_row[STEP_REQ_TASKS]); step->task_dist = slurm_atoul(step_row[STEP_REQ_TASKDIST]); step->start = slurm_atoul(step_row[STEP_REQ_START]); step->end = slurm_atoul(step_row[STEP_REQ_END]); /* if the job has ended end the step also */ if (!step->end && job_ended) { step->end = job->end; step->state = job->state; } if (job_cond && !job_cond->without_usage_truncation && job_cond->usage_start) { if (step->start && (step->start < job_cond->usage_start)) step->start = job_cond->usage_start; if (!step->start && step->end) step->start = step->end; if (!step->end || (step->end > job_cond->usage_end)) step->end = job_cond->usage_end; } /* figure this out by start stop */ step->suspended = slurm_atoul(step_row[STEP_REQ_SUSPENDED]); if (!step->start) { step->elapsed = 0; } else if (!step->end) { step->elapsed = now - step->start; } else { step->elapsed = step->end - step->start; } step->elapsed -= step->suspended; if ((int)step->elapsed < 0) step->elapsed = 0; step->req_cpufreq_min = slurm_atoul( step_row[STEP_REQ_REQ_CPUFREQ_MIN]); step->req_cpufreq_max = slurm_atoul( step_row[STEP_REQ_REQ_CPUFREQ_MAX]); step->req_cpufreq_gov = slurm_atoul( step_row[STEP_REQ_REQ_CPUFREQ_GOV]); step->stepname = xstrdup(step_row[STEP_REQ_NAME]); step->nodes = xstrdup(step_row[STEP_REQ_NODELIST]); step->requid = slurm_atoul(step_row[STEP_REQ_KILL_REQUID]); step->stats.cpu_min = slurm_atoul( step_row[STEP_REQ_MIN_CPU]); if (step->stats.cpu_min != NO_VAL) { step->user_cpu_sec = slurm_atoul( step_row[STEP_REQ_USER_SEC]); step->user_cpu_usec = slurm_atoul( step_row[STEP_REQ_USER_USEC]); step->sys_cpu_sec = slurm_atoul(step_row[STEP_REQ_SYS_SEC]); step->sys_cpu_usec = slurm_atoul( step_row[STEP_REQ_SYS_USEC]); step->tot_cpu_sec += step->user_cpu_sec + step->sys_cpu_sec; step->tot_cpu_usec += step->user_cpu_usec + step->sys_cpu_usec; step->stats.disk_read_max = atof(step_row[STEP_REQ_MAX_DISK_READ]); step->stats.disk_read_max_taskid = slurm_atoul( step_row[STEP_REQ_MAX_DISK_READ_TASK]); step->stats.disk_read_ave = atof(step_row[STEP_REQ_AVE_DISK_READ]); step->stats.disk_write_max = atof(step_row[STEP_REQ_MAX_DISK_WRITE]); step->stats.disk_write_max_taskid = slurm_atoul( step_row[STEP_REQ_MAX_DISK_WRITE_TASK]); step->stats.disk_write_ave = atof(step_row[STEP_REQ_AVE_DISK_WRITE]); step->stats.vsize_max = slurm_atoul( step_row[STEP_REQ_MAX_VSIZE]); step->stats.vsize_max_taskid = slurm_atoul( step_row[STEP_REQ_MAX_VSIZE_TASK]); step->stats.vsize_ave = atof(step_row[STEP_REQ_AVE_VSIZE]); step->stats.rss_max = slurm_atoul(step_row[STEP_REQ_MAX_RSS]); step->stats.rss_max_taskid = slurm_atoul( step_row[STEP_REQ_MAX_RSS_TASK]); step->stats.rss_ave = atof(step_row[STEP_REQ_AVE_RSS]); step->stats.pages_max = slurm_atoul( step_row[STEP_REQ_MAX_PAGES]); step->stats.pages_max_taskid = slurm_atoul( step_row[STEP_REQ_MAX_PAGES_TASK]); step->stats.pages_ave = atof(step_row[STEP_REQ_AVE_PAGES]); step->stats.cpu_min_taskid = slurm_atoul( step_row[STEP_REQ_MIN_CPU_TASK]); step->stats.cpu_ave = atof(step_row[STEP_REQ_AVE_CPU]); step->stats.act_cpufreq = atof(step_row[STEP_REQ_ACT_CPUFREQ]); step->stats.consumed_energy = atof( step_row[STEP_REQ_CONSUMED_ENERGY]); step->stats.vsize_max_nodeid = slurm_atoul( step_row[STEP_REQ_MAX_VSIZE_NODE]); step->stats.rss_max_nodeid = slurm_atoul( step_row[STEP_REQ_MAX_RSS_NODE]); step->stats.pages_max_nodeid = slurm_atoul( step_row[STEP_REQ_MAX_PAGES_NODE]); step->stats.cpu_min_nodeid = slurm_atoul( step_row[STEP_REQ_MIN_CPU_NODE]); } if (step_row[STEP_REQ_TRES]) step->tres_alloc_str = xstrdup(step_row[STEP_REQ_TRES]); } mysql_free_result(step_result); if (!job->track_steps) { uint64_t j_cpus, s_cpus; /* If we don't have track_steps we want to see if we have multiple steps. If we only have 1 step check the job name against the step name in most all cases it will be different. If it is different print out the step separate. It could also be a single step/allocation where the job was allocated more than the step requested (eg. CR_Socket). */ if (list_count(job->steps) > 1) job->track_steps = 1; else if (step && (xstrcmp(step->stepname, job->jobname) || (((j_cpus = slurmdb_find_tres_count_in_string( job->tres_alloc_str, TRES_CPU)) != INFINITE64) && ((s_cpus = slurmdb_find_tres_count_in_string( step->tres_alloc_str, TRES_CPU)) != INFINITE64) && j_cpus != s_cpus))) job->track_steps = 1; } skip_steps: /* need to reset here to make the above test valid */ step = NULL; } mysql_free_result(result); end_it: if (itr2) list_iterator_destroy(itr2); FREE_NULL_LIST(local_cluster_list); if (rc == SLURM_SUCCESS) list_transfer(sent_list, job_list); FREE_NULL_LIST(job_list); return rc; }