int glp_write_lp(glp_prob *P, const glp_cpxcp *parm, const char *fname) { /* write problem data in CPLEX LP format */ glp_cpxcp _parm; struct csa _csa, *csa = &_csa; glp_file *fp; GLPROW *row; GLPCOL *col; GLPAIJ *aij; int i, j, len, flag, count, ret; char line[1000+1], term[500+1], name[255+1]; xprintf("Writing problem data to '%s'...\n", fname); if (parm == NULL) glp_init_cpxcp(&_parm), parm = &_parm; /* check control parameters */ check_parm("glp_write_lp", parm); /* initialize common storage area */ csa->P = P; csa->parm = parm; /* create output CPLEX LP file */ fp = glp_open(fname, "w"), count = 0; if (fp == NULL) { xprintf("Unable to create '%s' - %s\n", fname, get_err_msg()); ret = 1; goto done; } /* write problem name */ xfprintf(fp, "\\* Problem: %s *\\\n", P->name == NULL ? "Unknown" : P->name), count++; xfprintf(fp, "\n"), count++; /* the problem should contain at least one row and one column */ if (!(P->m > 0 && P->n > 0)) { xprintf("Warning: problem has no rows/columns\n"); xfprintf(fp, "\\* WARNING: PROBLEM HAS NO ROWS/COLUMNS *\\\n"), count++; xfprintf(fp, "\n"), count++; goto skip; } /* write the objective function definition */ if (P->dir == GLP_MIN) xfprintf(fp, "Minimize\n"), count++; else if (P->dir == GLP_MAX) xfprintf(fp, "Maximize\n"), count++; else xassert(P != P); row_name(csa, 0, name); sprintf(line, " %s:", name); len = 0; for (j = 1; j <= P->n; j++) { col = P->col[j]; if (col->coef != 0.0 || col->ptr == NULL) { len++; col_name(csa, j, name); if (col->coef == 0.0) sprintf(term, " + 0 %s", name); /* empty column */ else if (col->coef == +1.0) sprintf(term, " + %s", name); else if (col->coef == -1.0) sprintf(term, " - %s", name); else if (col->coef > 0.0) sprintf(term, " + %.*g %s", DBL_DIG, +col->coef, name); else sprintf(term, " - %.*g %s", DBL_DIG, -col->coef, name); if (strlen(line) + strlen(term) > 72) xfprintf(fp, "%s\n", line), line[0] = '\0', count++; strcat(line, term); } } if (len == 0) { /* empty objective */ sprintf(term, " 0 %s", col_name(csa, 1, name)); strcat(line, term); } xfprintf(fp, "%s\n", line), count++; if (P->c0 != 0.0) xfprintf(fp, "\\* constant term = %.*g *\\\n", DBL_DIG, P->c0), count++; xfprintf(fp, "\n"), count++; /* write the constraints section */ xfprintf(fp, "Subject To\n"), count++; for (i = 1; i <= P->m; i++) { row = P->row[i]; if (row->type == GLP_FR) continue; /* skip free row */ row_name(csa, i, name); sprintf(line, " %s:", name); /* linear form */ for (aij = row->ptr; aij != NULL; aij = aij->r_next) { col_name(csa, aij->col->j, name); if (aij->val == +1.0) sprintf(term, " + %s", name); else if (aij->val == -1.0) sprintf(term, " - %s", name); else if (aij->val > 0.0) sprintf(term, " + %.*g %s", DBL_DIG, +aij->val, name); else sprintf(term, " - %.*g %s", DBL_DIG, -aij->val, name); if (strlen(line) + strlen(term) > 72) xfprintf(fp, "%s\n", line), line[0] = '\0', count++; strcat(line, term); } if (row->type == GLP_DB) { /* double-bounded (ranged) constraint */ sprintf(term, " - ~r_%d", i); if (strlen(line) + strlen(term) > 72) xfprintf(fp, "%s\n", line), line[0] = '\0', count++; strcat(line, term); } else if (row->ptr == NULL) { /* empty constraint */ sprintf(term, " 0 %s", col_name(csa, 1, name)); strcat(line, term); } /* right hand-side */ if (row->type == GLP_LO) sprintf(term, " >= %.*g", DBL_DIG, row->lb); else if (row->type == GLP_UP) sprintf(term, " <= %.*g", DBL_DIG, row->ub); else if (row->type == GLP_DB || row->type == GLP_FX) sprintf(term, " = %.*g", DBL_DIG, row->lb); else xassert(row != row); if (strlen(line) + strlen(term) > 72) xfprintf(fp, "%s\n", line), line[0] = '\0', count++; strcat(line, term); xfprintf(fp, "%s\n", line), count++; } xfprintf(fp, "\n"), count++; /* write the bounds section */ flag = 0; for (i = 1; i <= P->m; i++) { row = P->row[i]; if (row->type != GLP_DB) continue; if (!flag) xfprintf(fp, "Bounds\n"), flag = 1, count++; xfprintf(fp, " 0 <= ~r_%d <= %.*g\n", i, DBL_DIG, row->ub - row->lb), count++; } for (j = 1; j <= P->n; j++) { col = P->col[j]; if (col->type == GLP_LO && col->lb == 0.0) continue; if (!flag) xfprintf(fp, "Bounds\n"), flag = 1, count++; col_name(csa, j, name); if (col->type == GLP_FR) xfprintf(fp, " %s free\n", name), count++; else if (col->type == GLP_LO) xfprintf(fp, " %s >= %.*g\n", name, DBL_DIG, col->lb), count++; else if (col->type == GLP_UP) xfprintf(fp, " -Inf <= %s <= %.*g\n", name, DBL_DIG, col->ub), count++; else if (col->type == GLP_DB) xfprintf(fp, " %.*g <= %s <= %.*g\n", DBL_DIG, col->lb, name, DBL_DIG, col->ub), count++; else if (col->type == GLP_FX) xfprintf(fp, " %s = %.*g\n", name, DBL_DIG, col->lb), count++; else xassert(col != col); } if (flag) xfprintf(fp, "\n"), count++; /* write the integer section */ flag = 0; for (j = 1; j <= P->n; j++) { col = P->col[j]; if (col->kind == GLP_CV) continue; xassert(col->kind == GLP_IV); if (!flag) xfprintf(fp, "Generals\n"), flag = 1, count++; xfprintf(fp, " %s\n", col_name(csa, j, name)), count++; } if (flag) xfprintf(fp, "\n"), count++; skip: /* write the end keyword */ xfprintf(fp, "End\n"), count++; #if 0 /* FIXME */ xfflush(fp); #endif if (glp_ioerr(fp)) { xprintf("Write error on '%s' - %s\n", fname, get_err_msg()); ret = 1; goto done; } /* problem data has been successfully written */ xprintf("%d lines were written\n", count); ret = 0; done: if (fp != NULL) glp_close(fp); return ret; }
static void *_track_freeing_blocks(void *args) { bg_free_block_list_t *bg_free_list = (bg_free_block_list_t *)args; List track_list = bg_free_list->track_list; bool destroy = bg_free_list->destroy; uint32_t job_id = bg_free_list->job_id; int retry_cnt = 0; int free_cnt = 0, track_cnt = list_count(track_list); ListIterator itr = list_iterator_create(track_list); bg_record_t *bg_record; bool restore = true; debug("_track_freeing_blocks: Going to free %d for job %u", track_cnt, job_id); while (retry_cnt < MAX_FREE_RETRIES) { free_cnt = 0; slurm_mutex_lock(&block_state_mutex); /* just to make sure state is updated */ bridge_status_update_block_list_state(track_list); list_iterator_reset(itr); /* just incase this changes from the update function */ track_cnt = list_count(track_list); while ((bg_record = list_next(itr))) { if (bg_record->magic != BLOCK_MAGIC) { /* update_block_list_state should remove this already from the list so we shouldn't ever have this. */ error("_track_freeing_blocks: block was " "already destroyed %p", bg_record); xassert(0); free_cnt++; continue; } #ifndef HAVE_BG_FILES /* Fake a free since we are n deallocating state before this. */ if (!(bg_record->state & BG_BLOCK_ERROR_FLAG) && (retry_cnt >= 3)) bg_record->state = BG_BLOCK_FREE; #endif if ((bg_record->state == BG_BLOCK_FREE) || (bg_record->state & BG_BLOCK_ERROR_FLAG)) free_cnt++; else if (bg_record->state != BG_BLOCK_TERM) bg_free_block(bg_record, 0, 1); } slurm_mutex_unlock(&block_state_mutex); if (free_cnt == track_cnt) break; debug("_track_freeing_blocks: freed %d of %d for job %u", free_cnt, track_cnt, job_id); sleep(FREE_SLEEP_INTERVAL); retry_cnt++; } debug("_track_freeing_blocks: Freed them all for job %u", job_id); if (destroy) restore = false; /* If there is a block in error state we need to keep all * these blocks around. */ slurm_mutex_lock(&block_state_mutex); list_iterator_reset(itr); while ((bg_record = list_next(itr))) { /* block no longer exists */ if (bg_record->magic != BLOCK_MAGIC) continue; if (bg_record->state != BG_BLOCK_FREE) { restore = true; break; } } list_iterator_reset(itr); while ((bg_record = list_next(itr))) _post_block_free(bg_record, restore); slurm_mutex_unlock(&block_state_mutex); last_bg_update = time(NULL); list_iterator_destroy(itr); list_destroy(track_list); xfree(bg_free_list); return NULL; }
/* block_state_mutex should be locked before calling this */ static int _post_block_free(bg_record_t *bg_record, bool restore) { int rc = SLURM_SUCCESS; if (bg_record->magic != BLOCK_MAGIC) { error("block already destroyed %p", bg_record); xassert(0); return SLURM_ERROR; } bg_record->free_cnt--; if (bg_record->free_cnt == -1) { info("we got a negative 1 here for %s", bg_record->bg_block_id); xassert(0); return SLURM_SUCCESS; } else if (bg_record->modifying) { info("others are modifing this block %s, don't clear it up", bg_record->bg_block_id); return SLURM_SUCCESS; } else if (bg_record->free_cnt) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("%d others are trying to destroy this block %s", bg_record->free_cnt, bg_record->bg_block_id); return SLURM_SUCCESS; } /* Even if the block is already in error state we need to do this to avoid any overlapping blocks that may have been created due to bad hardware. */ if ((bg_record->state & (~BG_BLOCK_ERROR_FLAG)) != BG_BLOCK_FREE) { /* Something isn't right, go mark this one in an error state. */ update_block_msg_t block_msg; if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: block %s is not in state " "free (%s), putting it in error state.", bg_record->bg_block_id, bg_block_state_string(bg_record->state)); slurm_init_update_block_msg(&block_msg); block_msg.bg_block_id = bg_record->bg_block_id; block_msg.state = BG_BLOCK_ERROR_FLAG; block_msg.reason = "Block would not deallocate"; slurm_mutex_unlock(&block_state_mutex); select_g_update_block(&block_msg); slurm_mutex_lock(&block_state_mutex); if (block_ptr_exist_in_list(bg_lists->main, bg_record)) bg_record->destroy = 0; return SLURM_SUCCESS; } /* If we are here we are done with the destroy so just reset it. */ bg_record->destroy = 0; /* A bit of a sanity check to make sure blocks are being removed out of all the lists. */ remove_from_bg_list(bg_lists->booted, bg_record); if (remove_from_bg_list(bg_lists->job_running, bg_record) == SLURM_SUCCESS) { debug2("_post_block_free: we are freeing block %s and " "it was in the job_running list. This can happen if a " "block is removed while waiting for mmcs to finish " "removing the job from the block.", bg_record->bg_block_id); num_unused_cpus += bg_record->cpu_cnt; } /* If we don't have any mp_counts force block removal */ if (restore && bg_record->mp_count) return SLURM_SUCCESS; if (remove_from_bg_list(bg_lists->main, bg_record) != SLURM_SUCCESS) { /* This should only happen if called from * bg_job_place.c where the block was never added to * the list. */ debug("_post_block_free: It appears this block %s isn't " "in the main list anymore.", bg_record->bg_block_id); } if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: removing %s from database", bg_record->bg_block_id); rc = bridge_block_remove(bg_record); if (rc != SLURM_SUCCESS) { if (rc == BG_ERROR_BLOCK_NOT_FOUND) { debug("_post_block_free: block %s is not found", bg_record->bg_block_id); } else { error("_post_block_free: " "bridge_block_remove(%s): %s", bg_record->bg_block_id, bg_err_str(rc)); } } else if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: done %s(%p)", bg_record->bg_block_id, bg_record); destroy_bg_record(bg_record); if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("_post_block_free: destroyed"); return SLURM_SUCCESS; }
static List _get_precs(List task_list, bool pgid_plugin, uint64_t cont_id, jag_callbacks_t *callbacks) { List prec_list = list_create(destroy_jag_prec); char proc_stat_file[256]; /* Allow ~20x extra length */ char proc_io_file[256]; /* Allow ~20x extra length */ char proc_smaps_file[256]; /* Allow ~20x extra length */ static int slash_proc_open = 0; int i; struct jobacctinfo *jobacct = NULL; xassert(task_list); jobacct = list_peek(task_list); if (!pgid_plugin) { pid_t *pids = NULL; int npids = 0; /* get only the processes in the proctrack container */ proctrack_g_get_pids(cont_id, &pids, &npids); if (!npids) { /* update consumed energy even if pids do not exist */ if (jobacct) { acct_gather_energy_g_get_data( energy_profile, &jobacct->energy); jobacct->tres_usage_in_tot[TRES_ARRAY_ENERGY] = jobacct->energy.consumed_energy; jobacct->tres_usage_out_tot[TRES_ARRAY_ENERGY] = jobacct->energy.current_watts; debug2("%s: energy = %"PRIu64" watts = %"PRIu64, __func__, jobacct->tres_usage_in_tot[ TRES_ARRAY_ENERGY], jobacct->tres_usage_out_tot[ TRES_ARRAY_ENERGY]); } debug4("no pids in this container %"PRIu64"", cont_id); goto finished; } for (i = 0; i < npids; i++) { snprintf(proc_stat_file, 256, "/proc/%d/stat", pids[i]); snprintf(proc_io_file, 256, "/proc/%d/io", pids[i]); snprintf(proc_smaps_file, 256, "/proc/%d/smaps", pids[i]); _handle_stats(prec_list, proc_stat_file, proc_io_file, proc_smaps_file, callbacks, jobacct ? jobacct->tres_count : 0); } xfree(pids); } else { struct dirent *slash_proc_entry; char *iptr = NULL, *optr = NULL, *optr2 = NULL; if (slash_proc_open) { rewinddir(slash_proc); } else { slash_proc=opendir("/proc"); if (slash_proc == NULL) { perror("opening /proc"); goto finished; } slash_proc_open=1; } strcpy(proc_stat_file, "/proc/"); strcpy(proc_io_file, "/proc/"); strcpy(proc_smaps_file, "/proc/"); while ((slash_proc_entry = readdir(slash_proc))) { /* Save a few cyles by simulating * strcat(statFileName, slash_proc_entry->d_name); * strcat(statFileName, "/stat"); * while checking for a numeric filename (which really * should be a pid). Then do the same for the * /proc/<pid>/io file name. */ optr = proc_stat_file + sizeof("/proc"); iptr = slash_proc_entry->d_name; i = 0; do { if ((*iptr < '0') || ((*optr++ = *iptr++) > '9')) { i = -1; break; } } while (*iptr); if (i == -1) continue; iptr = (char*)"/stat"; do { *optr++ = *iptr++; } while (*iptr); *optr = 0; optr2 = proc_io_file + sizeof("/proc"); iptr = slash_proc_entry->d_name; i = 0; do { if ((*iptr < '0') || ((*optr2++ = *iptr++) > '9')) { i = -1; break; } } while (*iptr); if (i == -1) continue; iptr = (char*)"/io"; do { *optr2++ = *iptr++; } while (*iptr); *optr2 = 0; optr2 = proc_smaps_file + sizeof("/proc"); iptr = slash_proc_entry->d_name; i = 0; do { if ((*iptr < '0') || ((*optr2++ = *iptr++) > '9')) { i = -1; break; } } while (*iptr); if (i == -1) continue; iptr = (char*)"/smaps"; do { *optr2++ = *iptr++; } while (*iptr); *optr2 = 0; _handle_stats(prec_list, proc_stat_file, proc_io_file, proc_smaps_file,callbacks, jobacct ? jobacct->tres_count : 0); } } finished: return prec_list; }
/* * addto_update_list - add object updated to list * IN/OUT update_list: list of updated objects * IN type: update type * IN object: object updated * RET: error code * * NOTE: This function will take the object given and free it later so it * needed to be removed from a list if in one before. */ extern int addto_update_list(List update_list, slurmdb_update_type_t type, void *object) { slurmdb_update_object_t *update_object = NULL; slurmdb_association_rec_t *assoc = object; slurmdb_qos_rec_t *qos = object; ListIterator itr = NULL; if(!update_list) { error("no update list given"); return SLURM_ERROR; } itr = list_iterator_create(update_list); while((update_object = list_next(itr))) { if(update_object->type == type) break; } list_iterator_destroy(itr); if(update_object) { /* here we prepend primarly for remove association since parents need to be removed last, and they are removed first in the calling code */ list_prepend(update_object->objects, object); return SLURM_SUCCESS; } update_object = xmalloc(sizeof(slurmdb_update_object_t)); list_append(update_list, update_object); update_object->type = type; list_sort(update_list, (ListCmpF)_sort_update_object_dec); switch(type) { case SLURMDB_MODIFY_USER: case SLURMDB_ADD_USER: case SLURMDB_REMOVE_USER: case SLURMDB_ADD_COORD: case SLURMDB_REMOVE_COORD: update_object->objects = list_create(slurmdb_destroy_user_rec); break; case SLURMDB_ADD_ASSOC: /* We are going to send these to the slurmctld's so lets set up the correct limits to INIFINITE instead of NO_VAL */ if(assoc->grp_cpu_mins == (uint64_t)NO_VAL) assoc->grp_cpu_mins = (uint64_t)INFINITE; if(assoc->grp_cpu_run_mins == (uint64_t)NO_VAL) assoc->grp_cpu_run_mins = (uint64_t)INFINITE; if(assoc->grp_cpus == NO_VAL) assoc->grp_cpus = INFINITE; if(assoc->grp_jobs == NO_VAL) assoc->grp_jobs = INFINITE; if(assoc->grp_nodes == NO_VAL) assoc->grp_nodes = INFINITE; if(assoc->grp_submit_jobs == NO_VAL) assoc->grp_submit_jobs = INFINITE; if(assoc->grp_wall == NO_VAL) assoc->grp_wall = INFINITE; if(assoc->max_cpu_mins_pj == (uint64_t)NO_VAL) assoc->max_cpu_mins_pj = (uint64_t)INFINITE; if(assoc->max_cpu_run_mins == (uint64_t)NO_VAL) assoc->max_cpu_run_mins = (uint64_t)INFINITE; if(assoc->max_cpus_pj == NO_VAL) assoc->max_cpus_pj = INFINITE; if(assoc->max_jobs == NO_VAL) assoc->max_jobs = INFINITE; if(assoc->max_nodes_pj == NO_VAL) assoc->max_nodes_pj = INFINITE; if(assoc->max_submit_jobs == NO_VAL) assoc->max_submit_jobs = INFINITE; if(assoc->max_wall_pj == NO_VAL) assoc->max_wall_pj = INFINITE; case SLURMDB_MODIFY_ASSOC: case SLURMDB_REMOVE_ASSOC: xassert(((slurmdb_association_rec_t *)object)->cluster); update_object->objects = list_create( slurmdb_destroy_association_rec); break; case SLURMDB_ADD_QOS: /* We are going to send these to the slurmctld's so lets set up the correct limits to INIFINITE instead of NO_VAL */ if(qos->grp_cpu_mins == (uint64_t)NO_VAL) qos->grp_cpu_mins = (uint64_t)INFINITE; if(qos->grp_cpu_run_mins == (uint64_t)NO_VAL) qos->grp_cpu_run_mins = (uint64_t)INFINITE; if(qos->grp_cpus == NO_VAL) qos->grp_cpus = INFINITE; if(qos->grp_jobs == NO_VAL) qos->grp_jobs = INFINITE; if(qos->grp_nodes == NO_VAL) qos->grp_nodes = INFINITE; if(qos->grp_submit_jobs == NO_VAL) qos->grp_submit_jobs = INFINITE; if(qos->grp_wall == NO_VAL) qos->grp_wall = INFINITE; if(qos->max_cpu_mins_pj == (uint64_t)NO_VAL) qos->max_cpu_mins_pj = (uint64_t)INFINITE; if(qos->max_cpu_run_mins_pu == (uint64_t)NO_VAL) qos->max_cpu_run_mins_pu = (uint64_t)INFINITE; if(qos->max_cpus_pj == NO_VAL) qos->max_cpus_pj = INFINITE; if(qos->max_cpus_pu == NO_VAL) qos->max_cpus_pu = INFINITE; if(qos->max_jobs_pu == NO_VAL) qos->max_jobs_pu = INFINITE; if(qos->max_nodes_pj == NO_VAL) qos->max_nodes_pj = INFINITE; if(qos->max_nodes_pu == NO_VAL) qos->max_nodes_pu = INFINITE; if(qos->max_submit_jobs_pu == NO_VAL) qos->max_submit_jobs_pu = INFINITE; if(qos->max_wall_pj == NO_VAL) qos->max_wall_pj = INFINITE; case SLURMDB_MODIFY_QOS: case SLURMDB_REMOVE_QOS: update_object->objects = list_create( slurmdb_destroy_qos_rec); break; case SLURMDB_ADD_WCKEY: case SLURMDB_MODIFY_WCKEY: case SLURMDB_REMOVE_WCKEY: xassert(((slurmdb_wckey_rec_t *)object)->cluster); update_object->objects = list_create( slurmdb_destroy_wckey_rec); break; case SLURMDB_ADD_CLUSTER: case SLURMDB_REMOVE_CLUSTER: /* This should only be the name of the cluster, and is only used in the plugin for rollback purposes. */ update_object->objects = list_create(slurm_destroy_char); break; case SLURMDB_UPDATE_NOTSET: default: error("unknown type set in update_object: %d", type); return SLURM_ERROR; } debug4("XXX: update object with type %d added", type); list_append(update_object->objects, object); return SLURM_SUCCESS; }
int glp_asnprob_okalg(int form, glp_graph *G, int v_set, int a_cost, double *sol, int a_x) { /* solve assignment problem with out-of-kilter algorithm */ glp_vertex *v; glp_arc *a; int nv, na, i, k, *tail, *head, *low, *cap, *cost, *x, *pi, ret; double temp; if (!(form == GLP_ASN_MIN || form == GLP_ASN_MAX || form == GLP_ASN_MMP)) xerror("glp_asnprob_okalg: form = %d; invalid parameter\n", form); if (v_set >= 0 && v_set > G->v_size - (int)sizeof(int)) xerror("glp_asnprob_okalg: v_set = %d; invalid offset\n", v_set); if (a_cost >= 0 && a_cost > G->a_size - (int)sizeof(double)) xerror("glp_asnprob_okalg: a_cost = %d; invalid offset\n", a_cost); if (a_x >= 0 && a_x > G->a_size - (int)sizeof(int)) xerror("glp_asnprob_okalg: a_x = %d; invalid offset\n", a_x); if (glp_check_asnprob(G, v_set)) return GLP_EDATA; /* nv is the total number of nodes in the resulting network */ nv = G->nv + 1; /* na is the total number of arcs in the resulting network */ na = G->na + G->nv; /* allocate working arrays */ tail = xcalloc(1+na, sizeof(int)); head = xcalloc(1+na, sizeof(int)); low = xcalloc(1+na, sizeof(int)); cap = xcalloc(1+na, sizeof(int)); cost = xcalloc(1+na, sizeof(int)); x = xcalloc(1+na, sizeof(int)); pi = xcalloc(1+nv, sizeof(int)); /* construct the resulting network */ k = 0; /* (original arcs) */ for (i = 1; i <= G->nv; i++) { v = G->v[i]; for (a = v->out; a != NULL; a = a->t_next) { k++; tail[k] = a->tail->i; head[k] = a->head->i; low[k] = 0; cap[k] = 1; if (a_cost >= 0) memcpy(&temp, (char *)a->data + a_cost, sizeof(double)); else temp = 1.0; if (!(fabs(temp) <= (double)INT_MAX && temp == floor(temp))) { ret = GLP_EDATA; goto done; } cost[k] = (int)temp; if (form != GLP_ASN_MIN) cost[k] = - cost[k]; } } /* (artificial arcs) */ for (i = 1; i <= G->nv; i++) { v = G->v[i]; k++; if (v->out == NULL) tail[k] = i, head[k] = nv; else if (v->in == NULL) tail[k] = nv, head[k] = i; else xassert(v != v); low[k] = (form == GLP_ASN_MMP ? 0 : 1); cap[k] = 1; cost[k] = 0; } xassert(k == na); /* find minimal-cost circulation in the resulting network */ ret = okalg(nv, na, tail, head, low, cap, cost, x, pi); switch (ret) { case 0: /* optimal circulation found */ ret = 0; break; case 1: /* no feasible circulation exists */ ret = GLP_ENOPFS; break; case 2: /* integer overflow occured */ ret = GLP_ERANGE; goto done; case 3: /* optimality test failed (logic error) */ ret = GLP_EFAIL; goto done; default: xassert(ret != ret); } /* store solution components */ /* (objective function = the total cost) */ if (sol != NULL) { temp = 0.0; for (k = 1; k <= na; k++) temp += (double)cost[k] * (double)x[k]; if (form != GLP_ASN_MIN) temp = - temp; *sol = temp; } /* (arc flows) */ if (a_x >= 0) { k = 0; for (i = 1; i <= G->nv; i++) { v = G->v[i]; for (a = v->out; a != NULL; a = a->t_next) { k++; if (ret == 0) xassert(x[k] == 0 || x[k] == 1); memcpy((char *)a->data + a_x, &x[k], sizeof(int)); } } } done: /* free working arrays */ xfree(tail); xfree(head); xfree(low); xfree(cap); xfree(cost); xfree(x); xfree(pi); return ret; }
extern int slurm_ckpt_op (uint32_t job_id, uint32_t step_id, struct step_record *step_ptr, uint16_t op, uint16_t data, char *image_dir, time_t * event_time, uint32_t *error_code, char **error_msg ) { int rc = SLURM_SUCCESS; struct check_job_info *check_ptr; if (!step_ptr) /* batch job restore */ return ESLURM_NOT_SUPPORTED; check_ptr = (struct check_job_info *)step_ptr->check_job; xassert(check_ptr); switch (op) { case CHECK_ABLE: if (check_ptr->disabled) rc = ESLURM_DISABLED; else { if ((check_ptr->reply_cnt < 1) && event_time) { /* Return time of last event */ *event_time = check_ptr->time_stamp; } rc = SLURM_SUCCESS; } break; case CHECK_DISABLE: check_ptr->disabled++; break; case CHECK_ENABLE: check_ptr->disabled--; break; case CHECK_CREATE: check_ptr->time_stamp = time(NULL); check_ptr->reply_cnt = 0; check_ptr->error_code = 0; xfree(check_ptr->error_msg); rc = _ckpt_step(step_ptr, data, 0); break; case CHECK_VACATE: check_ptr->time_stamp = time(NULL); check_ptr->reply_cnt = 0; check_ptr->error_code = 0; xfree(check_ptr->error_msg); rc = _ckpt_step(step_ptr, data, 1); break; case CHECK_RESTART: case CHECK_REQUEUE: /* Lots of work is required in Slurm to restart a * checkpointed job. For now the user can submit a * new job and execute "ompi_restart <snapshot>" */ rc = ESLURM_NOT_SUPPORTED; break; case CHECK_ERROR: xassert(error_code); xassert(error_msg); *error_code = check_ptr->error_code; xfree(*error_msg); *error_msg = xstrdup(check_ptr->error_msg); break; default: error("Invalid checkpoint operation: %d", op); rc = EINVAL; } return rc; }
/* pack_process_mapping() */ char * pack_process_mapping(uint32_t node_cnt, uint32_t task_cnt, uint16_t *tasks, uint32_t **tids) { int offset, i; int start_node, end_node; char *packing = NULL; /* next_task[i] - next process for processing */ uint16_t *next_task = xmalloc(node_cnt * sizeof(uint16_t)); packing = xstrdup("(vector"); offset = 0; while (offset < task_cnt) { int mapped = 0; int depth = -1; int j; start_node = end_node = 0; /* find the task with id == offset */ for (i = 0; i < node_cnt; i++) { if (next_task[i] < tasks[i]) { /* if we didn't consume entire * quota on this node */ xassert(offset >= tids[i][next_task[i]]); if (offset == tids[i][next_task[i]]) { start_node = i; break; } } } end_node = node_cnt; for (i = start_node; i < end_node; i++) { if (next_task[i] >= tasks[i] ) { /* Save first non-matching node index * and interrupt loop */ end_node = i; continue; } for (j = next_task[i]; ((j + 1) < tasks[i]) && ((tids[i][j]+1) == tids[i][j+1]); j++); j++; /* First run determines the depth */ if (depth < 0) { depth = j - next_task[i]; } else { /* If this is not the first node in the bar * check that: 1. First tid on this node is * sequentially next after last tid * on the previous node */ if (tids[i-1][next_task[i-1]-1] + 1 != tids[i][next_task[i]]) { end_node = i; continue; } } if (depth == (j - next_task[i])) { mapped += depth; next_task[i] = j; } else { /* Save first non-matching node index * * and interrupt loop */ end_node = i; } } xstrfmtcat(packing,",(%u,%u,%u)", start_node, end_node - start_node, depth); offset += mapped; } xstrcat(packing,")"); return packing; }
/* * The remainder of this file implements the standard SLURM checkpoint API. */ extern int slurm_ckpt_op (uint32_t job_id, uint32_t step_id, struct step_record *step_ptr, uint16_t op, uint16_t data, char *image_dir, time_t * event_time, uint32_t *error_code, char **error_msg ) { int rc = SLURM_SUCCESS; struct check_job_info *check_ptr; uint16_t done_sig = 0; struct job_record *job_ptr; struct node_record *node_ptr; pthread_attr_t attr; pthread_t ckpt_agent_tid = 0; char *nodelist; struct ckpt_req *req_ptr; /* job/step checked already */ job_ptr = find_job_record(job_id); if (!job_ptr) return ESLURM_INVALID_JOB_ID; if (step_id == SLURM_BATCH_SCRIPT) { check_ptr = (struct check_job_info *)job_ptr->check_job; node_ptr = find_first_node_record(job_ptr->node_bitmap); nodelist = node_ptr->name; } else { step_ptr = find_step_record(job_ptr, step_id); if (!step_ptr) return ESLURM_INVALID_JOB_ID; check_ptr = (struct check_job_info *)step_ptr->check_job; nodelist = step_ptr->step_layout->node_list; } xassert(check_ptr); switch (op) { case CHECK_ABLE: if (check_ptr->disabled) rc = ESLURM_DISABLED; else { *event_time = check_ptr->time_stamp; rc = SLURM_SUCCESS; } break; case CHECK_DISABLE: check_ptr->disabled++; break; case CHECK_ENABLE: check_ptr->disabled--; break; case CHECK_REQUEUE: if (step_id != SLURM_BATCH_SCRIPT) { rc = ESLURM_NOT_SUPPORTED; break; } /* no break */ case CHECK_VACATE: done_sig = SIGTERM; /* no break */ case CHECK_CREATE: if (check_ptr->disabled) { rc = ESLURM_DISABLED; break; } if (check_ptr->time_stamp != 0) { rc = EALREADY; break; } check_ptr->time_stamp = time(NULL); check_ptr->error_code = 0; xfree(check_ptr->error_msg); req_ptr = xmalloc(sizeof(struct ckpt_req)); if (!req_ptr) { rc = ENOMEM; break; } req_ptr->gid = job_ptr->group_id; req_ptr->uid = job_ptr->user_id; req_ptr->job_id = job_id; req_ptr->step_id = step_id; req_ptr->begin_time = check_ptr->time_stamp; req_ptr->wait = data; req_ptr->image_dir = xstrdup(image_dir); req_ptr->nodelist = xstrdup(nodelist); req_ptr->sig_done = done_sig; req_ptr->op = op; slurm_attr_init(&attr); if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) { error("pthread_attr_setdetachstate: %m"); rc = errno; break; } if (pthread_create(&ckpt_agent_tid, &attr, _ckpt_agent_thr, req_ptr)) { error("pthread_create: %m"); rc = errno; break; } slurm_attr_destroy(&attr); break; case CHECK_RESTART: if (step_id != SLURM_BATCH_SCRIPT) { rc = ESLURM_NOT_SUPPORTED; break; } /* create a batch job from saved desc */ rc = ESLURM_NOT_SUPPORTED; /* TODO: save job script */ break; case CHECK_ERROR: xassert(error_code); xassert(error_msg); *error_code = check_ptr->error_code; xfree(*error_msg); *error_msg = xstrdup(check_ptr->error_msg); break; default: error("Invalid checkpoint operation: %d", op); rc = EINVAL; } return rc; }
extern int switch_p_libstate_restore(char *dir_name, bool recover) { #ifdef HAVE_NATIVE_CRAY char *data = NULL, *file_name; Buf buffer = NULL; int error_code = SLURM_SUCCESS; int state_fd, data_allocated = 0, data_read = 0, data_size = 0; xassert(dir_name != NULL); if (debug_flags & DEBUG_FLAG_SWITCH) { CRAY_INFO("restore from %s, recover %d", dir_name, (int) recover); } if (!recover) /* clean start, no recovery */ return SLURM_SUCCESS; file_name = xstrdup(dir_name); xstrcat(file_name, "/switch_cray_state"); state_fd = open (file_name, O_RDONLY); if (state_fd >= 0) { data_allocated = SWITCH_BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read (state_fd, &data[data_size], SWITCH_BUF_SIZE); if ((data_read < 0) && (errno == EINTR)) continue; if (data_read < 0) { CRAY_ERR("Read error on %s, %m", file_name); error_code = SLURM_ERROR; break; } else if (data_read == 0) break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close (state_fd); (void) unlink(file_name); /* One chance to recover */ xfree(file_name); } else { CRAY_ERR("No %s file for switch/cray state recovery", file_name); CRAY_ERR("Starting switch/cray with clean state"); xfree(file_name); return SLURM_SUCCESS; } if (error_code == SLURM_SUCCESS) { buffer = create_buf (data, data_size); data = NULL; /* now in buffer, don't xfree() */ _state_read_buf(buffer); } if (buffer) free_buf(buffer); xfree(data); #endif return SLURM_SUCCESS; }
/* * start_msg_tree - logic to begin the forward tree and * accumulate the return codes from processes getting the * the forwarded message * * IN: hl - hostlist_t - list of every node to send message to * IN: msg - slurm_msg_t - message to send. * IN: timeout - int - how long to wait in milliseconds. * RET List - List containing the responses of the childern * (if any) we forwarded the message to. List * containing type (ret_data_info_t). */ extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout) { int *span = NULL; fwd_tree_t *fwd_tree = NULL; pthread_mutex_t tree_mutex; pthread_cond_t notify; int j = 0, count = 0; List ret_list = NULL; char *name = NULL; int thr_count = 0; int host_count = 0; xassert(hl); xassert(msg); hostlist_uniq(hl); host_count = hostlist_count(hl); span = set_span(host_count, 0); slurm_mutex_init(&tree_mutex); pthread_cond_init(¬ify, NULL); ret_list = list_create(destroy_data_info); while ((name = hostlist_shift(hl))) { pthread_attr_t attr_agent; pthread_t thread_agent; int retries = 0; slurm_attr_init(&attr_agent); if (pthread_attr_setdetachstate (&attr_agent, PTHREAD_CREATE_DETACHED)) error("pthread_attr_setdetachstate error %m"); fwd_tree = xmalloc(sizeof(fwd_tree_t)); fwd_tree->orig_msg = msg; fwd_tree->ret_list = ret_list; fwd_tree->timeout = timeout; fwd_tree->notify = ¬ify; fwd_tree->tree_mutex = &tree_mutex; if(fwd_tree->timeout <= 0) { /* convert secs to msec */ fwd_tree->timeout = slurm_get_msg_timeout() * 1000; } fwd_tree->tree_hl = hostlist_create(name); free(name); for (j = 0; j < span[thr_count]; j++) { name = hostlist_shift(hl); if (!name) break; hostlist_push(fwd_tree->tree_hl, name); free(name); } while (pthread_create(&thread_agent, &attr_agent, _fwd_tree_thread, (void *)fwd_tree)) { error("pthread_create error %m"); if (++retries > MAX_RETRIES) fatal("Can't create pthread"); sleep(1); /* sleep and try again */ } slurm_attr_destroy(&attr_agent); thr_count++; } xfree(span); slurm_mutex_lock(&tree_mutex); count = list_count(ret_list); debug2("Tree head got back %d looking for %d", count, host_count); while ((count < host_count)) { pthread_cond_wait(¬ify, &tree_mutex); count = list_count(ret_list); debug2("Tree head got back %d", count); } debug2("Tree head got them all"); slurm_mutex_unlock(&tree_mutex); slurm_mutex_destroy(&tree_mutex); pthread_cond_destroy(¬ify); return ret_list; }
static void parse_bounds(struct csa *csa) { int j, lb_flag; double lb, s; /* parse the keyword 'bounds' */ xassert(csa->token == T_BOUNDS); scan_token(csa); loop: /* bound definition can start with a sign, numeric constant, or a symbolic name */ if (!(csa->token == T_PLUS || csa->token == T_MINUS || csa->token == T_NUMBER || csa->token == T_NAME)) goto done; /* parse bound definition */ if (csa->token == T_PLUS || csa->token == T_MINUS) { /* parse signed lower bound */ lb_flag = 1; s = (csa->token == T_PLUS ? +1.0 : -1.0); scan_token(csa); if (csa->token == T_NUMBER) lb = s * csa->value, scan_token(csa); else if (the_same(csa->image, "infinity") || the_same(csa->image, "inf")) { if (s > 0.0) error(csa, "invalid use of '+inf' as lower bound\n"); lb = -DBL_MAX, scan_token(csa); } else error(csa, "missing lower bound\n"); } else if (csa->token == T_NUMBER) { /* parse unsigned lower bound */ lb_flag = 1; lb = csa->value, scan_token(csa); } else { /* lower bound is not specified */ lb_flag = 0; } /* parse the token that should follow the lower bound */ if (lb_flag) { if (csa->token != T_LE) error(csa, "missing '<', '<=', or '=<' after lower bound\n") ; scan_token(csa); } /* parse variable name */ if (csa->token != T_NAME) error(csa, "missing variable name\n"); j = find_col(csa, csa->image); /* set lower bound */ if (lb_flag) set_lower_bound(csa, j, lb); scan_token(csa); /* parse the context that follows the variable name */ if (csa->token == T_LE) { /* parse upper bound */ scan_token(csa); if (csa->token == T_PLUS || csa->token == T_MINUS) { /* parse signed upper bound */ s = (csa->token == T_PLUS ? +1.0 : -1.0); scan_token(csa); if (csa->token == T_NUMBER) { set_upper_bound(csa, j, s * csa->value); scan_token(csa); } else if (the_same(csa->image, "infinity") || the_same(csa->image, "inf")) { if (s < 0.0) error(csa, "invalid use of '-inf' as upper bound\n"); set_upper_bound(csa, j, +DBL_MAX); scan_token(csa); } else error(csa, "missing upper bound\n"); } else if (csa->token == T_NUMBER) { /* parse unsigned upper bound */ set_upper_bound(csa, j, csa->value); scan_token(csa); } else error(csa, "missing upper bound\n"); } else if (csa->token == T_GE) { /* parse lower bound */ if (lb_flag) { /* the context '... <= x >= ...' is invalid */ error(csa, "invalid bound definition\n"); } scan_token(csa); if (csa->token == T_PLUS || csa->token == T_MINUS) { /* parse signed lower bound */ s = (csa->token == T_PLUS ? +1.0 : -1.0); scan_token(csa); if (csa->token == T_NUMBER) { set_lower_bound(csa, j, s * csa->value); scan_token(csa); } else if (the_same(csa->image, "infinity") || the_same(csa->image, "inf") == 0) { if (s > 0.0) error(csa, "invalid use of '+inf' as lower bound\n"); set_lower_bound(csa, j, -DBL_MAX); scan_token(csa); } else error(csa, "missing lower bound\n"); } else if (csa->token == T_NUMBER) { /* parse unsigned lower bound */ set_lower_bound(csa, j, csa->value); scan_token(csa); } else error(csa, "missing lower bound\n"); } else if (csa->token == T_EQ) { /* parse fixed value */ if (lb_flag) { /* the context '... <= x = ...' is invalid */ error(csa, "invalid bound definition\n"); } scan_token(csa); if (csa->token == T_PLUS || csa->token == T_MINUS) { /* parse signed fixed value */ s = (csa->token == T_PLUS ? +1.0 : -1.0); scan_token(csa); if (csa->token == T_NUMBER) { set_lower_bound(csa, j, s * csa->value); set_upper_bound(csa, j, s * csa->value); scan_token(csa); } else error(csa, "missing fixed value\n"); } else if (csa->token == T_NUMBER) { /* parse unsigned fixed value */ set_lower_bound(csa, j, csa->value); set_upper_bound(csa, j, csa->value); scan_token(csa); } else error(csa, "missing fixed value\n"); } else if (the_same(csa->image, "free")) { /* parse the keyword 'free' */ if (lb_flag) { /* the context '... <= x free ...' is invalid */ error(csa, "invalid bound definition\n"); } set_lower_bound(csa, j, -DBL_MAX); set_upper_bound(csa, j, +DBL_MAX); scan_token(csa); } else if (!lb_flag) { /* neither lower nor upper bounds are specified */ error(csa, "invalid bound definition\n"); } goto loop; done: return; }
static void check_parm(const char *func, const glp_cpxcp *parm) { /* check control parameters */ xassert(func != NULL); xassert(parm != NULL); return; }
void glp_init_cpxcp(glp_cpxcp *parm) { xassert(parm != NULL); return; }
int glp_maxflow_ffalg(glp_graph *G, int s, int t, int a_cap, double *sol, int a_x, int v_cut) { /* find maximal flow with Ford-Fulkerson algorithm */ glp_vertex *v; glp_arc *a; int nv, na, i, k, flag, *tail, *head, *cap, *x, ret; char *cut; double temp; if (!(1 <= s && s <= G->nv)) xerror("glp_maxflow_ffalg: s = %d; source node number out of r" "ange\n", s); if (!(1 <= t && t <= G->nv)) xerror("glp_maxflow_ffalg: t = %d: sink node number out of ran" "ge\n", t); if (s == t) xerror("glp_maxflow_ffalg: s = t = %d; source and sink nodes m" "ust be distinct\n", s); if (a_cap >= 0 && a_cap > G->a_size - (int)sizeof(double)) xerror("glp_maxflow_ffalg: a_cap = %d; invalid offset\n", a_cap); if (v_cut >= 0 && v_cut > G->v_size - (int)sizeof(int)) xerror("glp_maxflow_ffalg: v_cut = %d; invalid offset\n", v_cut); /* allocate working arrays */ nv = G->nv; na = G->na; tail = xcalloc(1+na, sizeof(int)); head = xcalloc(1+na, sizeof(int)); cap = xcalloc(1+na, sizeof(int)); x = xcalloc(1+na, sizeof(int)); if (v_cut < 0) cut = NULL; else cut = xcalloc(1+nv, sizeof(char)); /* copy the flow network */ k = 0; for (i = 1; i <= G->nv; i++) { v = G->v[i]; for (a = v->out; a != NULL; a = a->t_next) { k++; tail[k] = a->tail->i; head[k] = a->head->i; if (tail[k] == head[k]) { ret = GLP_EDATA; goto done; } if (a_cap >= 0) memcpy(&temp, (char *)a->data + a_cap, sizeof(double)); else temp = 1.0; if (!(0.0 <= temp && temp <= (double)INT_MAX && temp == floor(temp))) { ret = GLP_EDATA; goto done; } cap[k] = (int)temp; } } xassert(k == na); /* find maximal flow in the flow network */ ffalg(nv, na, tail, head, s, t, cap, x, cut); ret = 0; /* store solution components */ /* (objective function = total flow through the network) */ if (sol != NULL) { temp = 0.0; for (k = 1; k <= na; k++) { if (tail[k] == s) temp += (double)x[k]; else if (head[k] == s) temp -= (double)x[k]; } *sol = temp; } /* (arc flows) */ if (a_x >= 0) { k = 0; for (i = 1; i <= G->nv; i++) { v = G->v[i]; for (a = v->out; a != NULL; a = a->t_next) { temp = (double)x[++k]; memcpy((char *)a->data + a_x, &temp, sizeof(double)); } } } /* (node flags) */ if (v_cut >= 0) { for (i = 1; i <= G->nv; i++) { v = G->v[i]; flag = cut[i]; memcpy((char *)v->data + v_cut, &flag, sizeof(int)); } } done: /* free working arrays */ xfree(tail); xfree(head); xfree(cap); xfree(x); if (cut != NULL) xfree(cut); return ret; }
static void yfree(void *ptr) { xassert(ptr != NULL); free(ptr); return; }
void glp_mincost_lp(glp_prob *lp, glp_graph *G, int names, int v_rhs, int a_low, int a_cap, int a_cost) { glp_vertex *v; glp_arc *a; int i, j, type, ind[1+2]; double rhs, low, cap, cost, val[1+2]; if (!(names == GLP_ON || names == GLP_OFF)) xerror("glp_mincost_lp: names = %d; invalid parameter\n", names); if (v_rhs >= 0 && v_rhs > G->v_size - (int)sizeof(double)) xerror("glp_mincost_lp: v_rhs = %d; invalid offset\n", v_rhs); if (a_low >= 0 && a_low > G->a_size - (int)sizeof(double)) xerror("glp_mincost_lp: a_low = %d; invalid offset\n", a_low); if (a_cap >= 0 && a_cap > G->a_size - (int)sizeof(double)) xerror("glp_mincost_lp: a_cap = %d; invalid offset\n", a_cap); if (a_cost >= 0 && a_cost > G->a_size - (int)sizeof(double)) xerror("glp_mincost_lp: a_cost = %d; invalid offset\n", a_cost) ; glp_erase_prob(lp); if (names) glp_set_prob_name(lp, G->name); if (G->nv > 0) glp_add_rows(lp, G->nv); for (i = 1; i <= G->nv; i++) { v = G->v[i]; if (names) glp_set_row_name(lp, i, v->name); if (v_rhs >= 0) memcpy(&rhs, (char *)v->data + v_rhs, sizeof(double)); else rhs = 0.0; glp_set_row_bnds(lp, i, GLP_FX, rhs, rhs); } if (G->na > 0) glp_add_cols(lp, G->na); for (i = 1, j = 0; i <= G->nv; i++) { v = G->v[i]; for (a = v->out; a != NULL; a = a->t_next) { j++; if (names) { char name[50+1]; sprintf(name, "x[%d,%d]", a->tail->i, a->head->i); xassert(strlen(name) < sizeof(name)); glp_set_col_name(lp, j, name); } if (a->tail->i != a->head->i) { ind[1] = a->tail->i, val[1] = +1.0; ind[2] = a->head->i, val[2] = -1.0; glp_set_mat_col(lp, j, 2, ind, val); } if (a_low >= 0) memcpy(&low, (char *)a->data + a_low, sizeof(double)); else low = 0.0; if (a_cap >= 0) memcpy(&cap, (char *)a->data + a_cap, sizeof(double)); else cap = 1.0; if (cap == DBL_MAX) type = GLP_LO; else if (low != cap) type = GLP_DB; else type = GLP_FX; glp_set_col_bnds(lp, j, type, low, cap); if (a_cost >= 0) memcpy(&cost, (char *)a->data + a_cost, sizeof(double)); else cost = 0.0; glp_set_obj_coef(lp, j, cost); } } xassert(j == G->na); return; }
/* dump_all_front_end_state - save the state of all front_end nodes to file */ extern int dump_all_front_end_state(void) { #ifdef HAVE_FRONT_END /* Save high-water mark to avoid buffer growth with copies */ static int high_buffer_size = (1024 * 1024); int error_code = 0, i, log_fd; char *old_file, *new_file, *reg_file; front_end_record_t *front_end_ptr; /* Locks: Read config and node */ slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; Buf buffer = init_buf(high_buffer_size); DEF_TIMERS; START_TIMER; /* write header: version, time */ packstr(FRONT_END_STATE_VERSION, buffer); pack_time(time(NULL), buffer); /* write node records to buffer */ lock_slurmctld (node_read_lock); for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt; i++, front_end_ptr++) { xassert(front_end_ptr->magic == FRONT_END_MAGIC); _dump_front_end_state(front_end_ptr, buffer); } old_file = xstrdup (slurmctld_conf.state_save_location); xstrcat (old_file, "/front_end_state.old"); reg_file = xstrdup (slurmctld_conf.state_save_location); xstrcat (reg_file, "/front_end_state"); new_file = xstrdup (slurmctld_conf.state_save_location); xstrcat (new_file, "/front_end_state.new"); unlock_slurmctld (node_read_lock); /* write the buffer to file */ lock_state_files(); log_fd = creat (new_file, 0600); if (log_fd < 0) { error ("Can't save state, error creating file %s %m", new_file); error_code = errno; } else { int pos = 0, nwrite = get_buf_offset(buffer), amount, rc; char *data = (char *)get_buf_data(buffer); high_buffer_size = MAX(nwrite, high_buffer_size); while (nwrite > 0) { amount = write(log_fd, &data[pos], nwrite); if ((amount < 0) && (errno != EINTR)) { error("Error writing file %s, %m", new_file); error_code = errno; break; } nwrite -= amount; pos += amount; } rc = fsync_and_close(log_fd, "front_end"); if (rc && !error_code) error_code = rc; } if (error_code) (void) unlink (new_file); else { /* file shuffle */ (void) unlink (old_file); if (link(reg_file, old_file)) debug4("unable to create link for %s -> %s: %m", reg_file, old_file); (void) unlink (reg_file); if (link(new_file, reg_file)) debug4("unable to create link for %s -> %s: %m", new_file, reg_file); (void) unlink (new_file); } xfree (old_file); xfree (reg_file); xfree (new_file); unlock_state_files (); free_buf (buffer); END_TIMER2("dump_all_front_end_state"); return error_code; #else return SLURM_SUCCESS; #endif }
int glp_asnprob_hall(glp_graph *G, int v_set, int a_x) { glp_vertex *v; glp_arc *a; int card, i, k, loc, n, n1, n2, xij; int *num, *icn, *ip, *lenr, *iperm, *pr, *arp, *cv, *out; if (v_set >= 0 && v_set > G->v_size - (int)sizeof(int)) xerror("glp_asnprob_hall: v_set = %d; invalid offset\n", v_set); if (a_x >= 0 && a_x > G->a_size - (int)sizeof(int)) xerror("glp_asnprob_hall: a_x = %d; invalid offset\n", a_x); if (glp_check_asnprob(G, v_set)) return -1; /* determine the number of vertices in sets R and S and renumber vertices in S which correspond to columns of the matrix; skip all isolated vertices */ num = xcalloc(1+G->nv, sizeof(int)); n1 = n2 = 0; for (i = 1; i <= G->nv; i++) { v = G->v[i]; if (v->in == NULL && v->out != NULL) n1++, num[i] = 0; /* vertex in R */ else if (v->in != NULL && v->out == NULL) n2++, num[i] = n2; /* vertex in S */ else { xassert(v->in == NULL && v->out == NULL); num[i] = -1; /* isolated vertex */ } } /* the matrix must be square, thus, if it has more columns than rows, extra rows will be just empty, and vice versa */ n = (n1 >= n2 ? n1 : n2); /* allocate working arrays */ icn = xcalloc(1+G->na, sizeof(int)); ip = xcalloc(1+n, sizeof(int)); lenr = xcalloc(1+n, sizeof(int)); iperm = xcalloc(1+n, sizeof(int)); pr = xcalloc(1+n, sizeof(int)); arp = xcalloc(1+n, sizeof(int)); cv = xcalloc(1+n, sizeof(int)); out = xcalloc(1+n, sizeof(int)); /* build the adjacency matrix of the bipartite graph in row-wise format (rows are vertices in R, columns are vertices in S) */ k = 0, loc = 1; for (i = 1; i <= G->nv; i++) { if (num[i] != 0) continue; /* vertex i in R */ ip[++k] = loc; v = G->v[i]; for (a = v->out; a != NULL; a = a->t_next) { xassert(num[a->head->i] != 0); icn[loc++] = num[a->head->i]; } lenr[k] = loc - ip[k]; } xassert(loc-1 == G->na); /* make all extra rows empty (all extra columns are empty due to the row-wise format used) */ for (k++; k <= n; k++) ip[k] = loc, lenr[k] = 0; /* find a row permutation that maximizes the number of non-zeros on the main diagonal */ card = mc21a(n, icn, ip, lenr, iperm, pr, arp, cv, out); #if 1 /* 18/II-2010 */ /* FIXED: if card = n, arp remains clobbered on exit */ for (i = 1; i <= n; i++) arp[i] = 0; for (i = 1; i <= card; i++) { k = iperm[i]; xassert(1 <= k && k <= n); xassert(arp[k] == 0); arp[k] = i; } #endif /* store solution, if necessary */ if (a_x < 0) goto skip; k = 0; for (i = 1; i <= G->nv; i++) { if (num[i] != 0) continue; /* vertex i in R */ k++; v = G->v[i]; for (a = v->out; a != NULL; a = a->t_next) { /* arp[k] is the number of matched column or zero */ if (arp[k] == num[a->head->i]) { xassert(arp[k] != 0); xij = 1; } else xij = 0; memcpy((char *)a->data + a_x, &xij, sizeof(int)); } } skip: /* free working arrays */ xfree(num); xfree(icn); xfree(ip); xfree(lenr); xfree(iperm); xfree(pr); xfree(arp); xfree(cv); xfree(out); return card; }
/* * Return a list of plugin names that match the given type. * * IN plugin_type - Type of plugin to search for in the plugin_dir. * RET list of plugin names, NULL if none found. */ extern List plugin_get_plugins_of_type(char *plugin_type) { List plugin_names = NULL; char *plugin_dir = NULL, *dir = NULL, *save_ptr = NULL; char *type_under = NULL, *type_slash = NULL; DIR *dirp; struct dirent *e; int len; if (!(plugin_dir = slurm_get_plugin_dir())) { error("%s: No plugin dir given", __func__); goto done; } type_under = xstrdup_printf("%s_", plugin_type); type_slash = xstrdup_printf("%s/", plugin_type); dir = strtok_r(plugin_dir, ":", &save_ptr); while (dir) { /* Open the directory. */ if (!(dirp = opendir(dir))) { error("cannot open plugin directory %s", dir); goto done; } while (1) { char full_name[128]; if (!(e = readdir( dirp ))) break; /* Check only files with "plugintype_" in them. */ if (xstrncmp(e->d_name, type_under, strlen(type_under))) continue; len = strlen(e->d_name); len -= 3; /* Check only shared object files */ if (xstrcmp(e->d_name+len, ".so")) continue; /* add one for the / */ len++; xassert(len < sizeof(full_name)); snprintf(full_name, len, "%s%s", type_slash, e->d_name + strlen(type_slash)); if (!plugin_names) plugin_names = list_create(slurm_destroy_char); if (!list_find_first(plugin_names, slurm_find_char_in_list, full_name)) list_append(plugin_names, xstrdup(full_name)); } closedir(dirp); dir = strtok_r(NULL, ":", &save_ptr); } done: xfree(plugin_dir); xfree(type_under); xfree(type_slash); return plugin_names; }
/* * slurm_job_step_get_pids - get the complete list of pids for a given * job step * * IN job_id * IN step_id * IN node_list, optional, if NULL then all nodes in step are returned. * OUT resp * RET SLURM_SUCCESS on success SLURM_ERROR else */ extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id, char *node_list, job_step_pids_response_msg_t **resp) { int rc = SLURM_SUCCESS; slurm_msg_t req_msg; job_step_id_msg_t req; ListIterator itr; List ret_list = NULL; ret_data_info_t *ret_data_info = NULL; slurm_step_layout_t *step_layout = NULL; job_step_pids_response_msg_t *resp_out; bool created = 0; xassert(resp); if (!node_list) { if (!(step_layout = slurm_job_step_layout_get(job_id, step_id))) { rc = errno; error("slurm_job_step_get_pids: " "problem getting step_layout for %u.%u: %s", job_id, step_id, slurm_strerror(rc)); return rc; } node_list = step_layout->node_list; } if (!*resp) { resp_out = xmalloc(sizeof(job_step_pids_response_msg_t)); *resp = resp_out; created = 1; } else resp_out = *resp; debug("slurm_job_step_get_pids: " "getting pid information of job %u.%u on nodes %s", job_id, step_id, node_list); slurm_msg_t_init(&req_msg); memset(&req, 0, sizeof(job_step_id_msg_t)); resp_out->job_id = req.job_id = job_id; resp_out->step_id = req.step_id = step_id; req_msg.msg_type = REQUEST_JOB_STEP_PIDS; req_msg.data = &req; if (!(ret_list = slurm_send_recv_msgs(node_list, &req_msg, 0, false))) { error("slurm_job_step_get_pids: got an error no list returned"); rc = SLURM_ERROR; if (created) { slurm_job_step_pids_response_msg_free(resp_out); *resp = NULL; } goto cleanup; } itr = list_iterator_create(ret_list); while((ret_data_info = list_next(itr))) { switch (ret_data_info->type) { case RESPONSE_JOB_STEP_PIDS: if (!resp_out->pid_list) resp_out->pid_list = list_create( slurm_free_job_step_pids); list_push(resp_out->pid_list, ret_data_info->data); ret_data_info->data = NULL; break; case RESPONSE_SLURM_RC: rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); error("slurm_job_step_get_pids: " "there was an error with the " "list pid request rc = %s", slurm_strerror(rc)); break; default: rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); error("slurm_job_step_get_pids: " "unknown return given %d rc = %s", ret_data_info->type, slurm_strerror(rc)); break; } } list_iterator_destroy(itr); FREE_NULL_LIST(ret_list); if (resp_out->pid_list) list_sort(resp_out->pid_list, (ListCmpF)_sort_pids_by_name); cleanup: slurm_step_layout_destroy(step_layout); return rc; }
/* * srun_user_message - Send arbitrary message to an srun job (no job steps) */ extern int srun_user_message(struct job_record *job_ptr, char *msg) { slurm_addr_t * addr; srun_user_msg_t *msg_arg; xassert(job_ptr); if (!IS_JOB_PENDING(job_ptr) && !IS_JOB_RUNNING(job_ptr)) return ESLURM_ALREADY_DONE; if (job_ptr->other_port && job_ptr->resp_host && job_ptr->resp_host[0]) { addr = xmalloc(sizeof(struct sockaddr_in)); slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host); msg_arg = xmalloc(sizeof(srun_user_msg_t)); msg_arg->job_id = job_ptr->job_id; msg_arg->msg = xstrdup(msg); _srun_agent_launch(addr, job_ptr->resp_host, SRUN_USER_MSG, msg_arg, job_ptr->start_protocol_ver); return SLURM_SUCCESS; } else if (job_ptr->batch_flag && IS_JOB_RUNNING(job_ptr)) { #ifndef HAVE_FRONT_END struct node_record *node_ptr; #endif job_notify_msg_t *notify_msg_ptr; agent_arg_t *agent_arg_ptr; #ifdef HAVE_FRONT_END if (job_ptr->batch_host == NULL) return ESLURM_DISABLED; /* no allocated nodes */ agent_arg_ptr = (agent_arg_t *) xmalloc(sizeof(agent_arg_t)); agent_arg_ptr->hostlist = hostlist_create(job_ptr->batch_host); if (!agent_arg_ptr->hostlist) fatal("Invalid srun host: %s", job_ptr->batch_host); if (job_ptr->front_end_ptr) agent_arg_ptr->protocol_version = job_ptr->front_end_ptr->protocol_version; #else node_ptr = find_first_node_record(job_ptr->node_bitmap); if (node_ptr == NULL) return ESLURM_DISABLED; /* no allocated nodes */ agent_arg_ptr = (agent_arg_t *) xmalloc(sizeof(agent_arg_t)); agent_arg_ptr->hostlist = hostlist_create(node_ptr->name); agent_arg_ptr->protocol_version = node_ptr->protocol_version; if (!agent_arg_ptr->hostlist) fatal("Invalid srun host: %s", node_ptr->name); #endif notify_msg_ptr = (job_notify_msg_t *) xmalloc(sizeof(job_notify_msg_t)); notify_msg_ptr->job_id = job_ptr->job_id; notify_msg_ptr->message = xstrdup(msg); agent_arg_ptr->node_count = 1; agent_arg_ptr->retry = 0; agent_arg_ptr->msg_type = REQUEST_JOB_NOTIFY; agent_arg_ptr->msg_args = (void *) notify_msg_ptr; /* Launch the RPC via agent */ agent_queue_request(agent_arg_ptr); return SLURM_SUCCESS; } return ESLURM_DISABLED; }
extern void jag_common_poll_data( List task_list, bool pgid_plugin, uint64_t cont_id, jag_callbacks_t *callbacks, bool profile) { /* Update the data */ List prec_list = NULL; uint64_t total_job_mem = 0, total_job_vsize = 0; ListIterator itr; jag_prec_t *prec = NULL; struct jobacctinfo *jobacct = NULL; static int processing = 0; char sbuf[72]; int energy_counted = 0; time_t ct; static int over_memory_kill = -1; int i = 0; xassert(callbacks); if (!pgid_plugin && (cont_id == NO_VAL64)) { debug("cont_id hasn't been set yet not running poll"); return; } if (processing) { debug("already running, returning"); return; } processing = 1; if (!callbacks->get_precs) callbacks->get_precs = _get_precs; ct = time(NULL); prec_list = (*(callbacks->get_precs))(task_list, pgid_plugin, cont_id, callbacks); if (!list_count(prec_list) || !task_list || !list_count(task_list)) goto finished; /* We have no business being here! */ itr = list_iterator_create(task_list); while ((jobacct = list_next(itr))) { double cpu_calc; double last_total_cputime; if (!(prec = list_find_first(prec_list, _find_prec, jobacct))) continue; /* * Only jobacct_gather/cgroup uses prec_extra, and we want to * make sure we call it once per task, so call it here as we * iterate through the tasks instead of in get_precs. */ if (callbacks->prec_extra) (*(callbacks->prec_extra))(prec, jobacct->id.taskid); #if _DEBUG info("pid:%u ppid:%u rss:%"PRIu64" B", prec->pid, prec->ppid, prec->tres_data[TRES_ARRAY_MEM].size_read); #endif /* find all my descendents */ if (callbacks->get_offspring_data) (*(callbacks->get_offspring_data)) (prec_list, prec, prec->pid); last_total_cputime = (double)jobacct->tres_usage_in_tot[TRES_ARRAY_CPU]; cpu_calc = (prec->ssec + prec->usec) / (double)hertz; /* * Since we are not storing things as a double anymore make it * bigger so we don't loose precision. */ cpu_calc *= CPU_TIME_ADJ; prec->tres_data[TRES_ARRAY_CPU].size_read = (uint64_t)cpu_calc; /* get energy consumption * only once is enough since we * report per node energy consumption. * Energy is stored in read fields, while power is stored * in write fields.*/ debug2("energycounted = %d", energy_counted); if (energy_counted == 0) { acct_gather_energy_g_get_data( energy_profile, &jobacct->energy); prec->tres_data[TRES_ARRAY_ENERGY].size_read = jobacct->energy.consumed_energy; prec->tres_data[TRES_ARRAY_ENERGY].size_write = jobacct->energy.current_watts; debug2("%s: energy = %"PRIu64" watts = %"PRIu64" ave_watts = %u", __func__, prec->tres_data[TRES_ARRAY_ENERGY].size_read, prec->tres_data[TRES_ARRAY_ENERGY].size_write, jobacct->energy.ave_watts); energy_counted = 1; } /* tally their usage */ for (i = 0; i < jobacct->tres_count; i++) { if (prec->tres_data[i].size_read == INFINITE64) continue; if (jobacct->tres_usage_in_max[i] == INFINITE64) jobacct->tres_usage_in_max[i] = prec->tres_data[i].size_read; else jobacct->tres_usage_in_max[i] = MAX(jobacct->tres_usage_in_max[i], prec->tres_data[i].size_read); /* * Even with min we want to get the max as we are * looking at a specific task aso we are always looking * at the max that task had, not the min (or lots of * things will be zero). The min is from compairing * ranks later when combining. So here it will be the * same as the max value set above. * (same thing goes for the out) */ jobacct->tres_usage_in_min[i] = jobacct->tres_usage_in_max[i]; jobacct->tres_usage_in_tot[i] = prec->tres_data[i].size_read; if (jobacct->tres_usage_out_max[i] == INFINITE64) jobacct->tres_usage_out_max[i] = prec->tres_data[i].size_write; else jobacct->tres_usage_out_max[i] = MAX(jobacct->tres_usage_out_max[i], prec->tres_data[i].size_write); jobacct->tres_usage_out_min[i] = jobacct->tres_usage_out_max[i]; jobacct->tres_usage_out_tot[i] = prec->tres_data[i].size_write; } total_job_mem += jobacct->tres_usage_in_tot[TRES_ARRAY_MEM]; total_job_vsize += jobacct->tres_usage_in_tot[TRES_ARRAY_VMEM]; /* Update the cpu times */ jobacct->user_cpu_sec = (uint32_t)(prec->usec / (double)hertz); jobacct->sys_cpu_sec = (uint32_t)(prec->ssec / (double)hertz); /* compute frequency */ jobacct->this_sampled_cputime = cpu_calc - last_total_cputime; _get_sys_interface_freq_line( prec->last_cpu, "cpuinfo_cur_freq", sbuf); jobacct->act_cpufreq = _update_weighted_freq(jobacct, sbuf); debug("%s: Task %u pid %d ave_freq = %u mem size/max %"PRIu64"/%"PRIu64" vmem size/max %"PRIu64"/%"PRIu64", disk read size/max (%"PRIu64"/%"PRIu64"), disk write size/max (%"PRIu64"/%"PRIu64"), time %f(%u+%u) Energy tot/max %"PRIu64"/%"PRIu64" TotPower %"PRIu64" MaxPower %"PRIu64" MinPower %"PRIu64, __func__, jobacct->id.taskid, jobacct->pid, jobacct->act_cpufreq, jobacct->tres_usage_in_tot[TRES_ARRAY_MEM], jobacct->tres_usage_in_max[TRES_ARRAY_MEM], jobacct->tres_usage_in_tot[TRES_ARRAY_VMEM], jobacct->tres_usage_in_max[TRES_ARRAY_VMEM], jobacct->tres_usage_in_tot[TRES_ARRAY_FS_DISK], jobacct->tres_usage_in_max[TRES_ARRAY_FS_DISK], jobacct->tres_usage_out_tot[TRES_ARRAY_FS_DISK], jobacct->tres_usage_out_max[TRES_ARRAY_FS_DISK], (double)(jobacct->tres_usage_in_tot[TRES_ARRAY_CPU] / CPU_TIME_ADJ), jobacct->user_cpu_sec, jobacct->sys_cpu_sec, jobacct->tres_usage_in_tot[TRES_ARRAY_ENERGY], jobacct->tres_usage_in_max[TRES_ARRAY_ENERGY], jobacct->tres_usage_out_tot[TRES_ARRAY_ENERGY], jobacct->tres_usage_out_max[TRES_ARRAY_ENERGY], jobacct->tres_usage_out_min[TRES_ARRAY_ENERGY]); if (profile && acct_gather_profile_g_is_active(ACCT_GATHER_PROFILE_TASK)) { jobacct->cur_time = ct; _record_profile(jobacct); jobacct->last_tres_usage_in_tot = jobacct->tres_usage_in_tot[TRES_ARRAY_FS_DISK]; jobacct->last_tres_usage_out_tot = jobacct->tres_usage_out_tot[TRES_ARRAY_FS_DISK]; jobacct->last_total_cputime = jobacct->tres_usage_in_tot[TRES_ARRAY_CPU]; jobacct->last_time = jobacct->cur_time; } } list_iterator_destroy(itr); if (over_memory_kill == -1) over_memory_kill = slurm_get_job_acct_oom_kill(); if (over_memory_kill) jobacct_gather_handle_mem_limit(total_job_mem, total_job_vsize); finished: FREE_NULL_LIST(prec_list); processing = 0; }
int kellerman(int n, int (*func)(void *info, int i, int ind[]), void *info, void /* glp_graph */ *H_) { glp_graph *H = H_; struct set W_, *W = &W_, V_, *V = &V_; glp_arc *a; int i, j, k, m, t, len, card, best; xassert(n >= 0); /* H := (V, 0; 0), where V is the set of vertices of graph G */ glp_erase_graph(H, H->v_size, H->a_size); glp_add_vertices(H, n); /* W := 0 */ W->size = 0; W->list = xcalloc(1+n, sizeof(int)); W->pos = xcalloc(1+n, sizeof(int)); memset(&W->pos[1], 0, sizeof(int) * n); /* V := 0 */ V->size = 0; V->list = xcalloc(1+n, sizeof(int)); V->pos = xcalloc(1+n, sizeof(int)); memset(&V->pos[1], 0, sizeof(int) * n); /* main loop */ for (i = 1; i <= n; i++) { /* W must be empty */ xassert(W->size == 0); /* W := { j : i > j and (i,j) in E } */ len = func(info, i, W->list); xassert(0 <= len && len <= n); for (t = 1; t <= len; t++) { j = W->list[t]; xassert(1 <= j && j <= n); if (j >= i) continue; xassert(W->pos[j] == 0); W->list[++W->size] = j, W->pos[j] = W->size; } /* on i-th iteration we need to cover edges (i,j) for all j in W */ /* if W is empty, it is a special case */ if (W->size == 0) { /* set k := k + 1 and create new clique C[k] = { i } */ k = glp_add_vertices(H, 1) - n; glp_add_arc(H, i, n + k); continue; } /* try to include vertex i into existing cliques */ /* V must be empty */ xassert(V->size == 0); /* k is the number of cliques found so far */ k = H->nv - n; for (m = 1; m <= k; m++) { /* do while V != W; since here V is within W, we can use equivalent condition: do while |V| < |W| */ if (V->size == W->size) break; /* check if C[m] is within W */ for (a = H->v[n + m]->in; a != NULL; a = a->h_next) { j = a->tail->i; if (W->pos[j] == 0) break; } if (a != NULL) continue; /* C[m] is within W, expand clique C[m] with vertex i */ /* C[m] := C[m] union {i} */ glp_add_arc(H, i, n + m); /* V is a set of vertices whose incident edges are already covered by existing cliques */ /* V := V union C[m] */ for (a = H->v[n + m]->in; a != NULL; a = a->h_next) { j = a->tail->i; if (V->pos[j] == 0) V->list[++V->size] = j, V->pos[j] = V->size; } } /* remove from set W the vertices whose incident edges are already covered by existing cliques */ /* W := W \ V, V := 0 */ for (t = 1; t <= V->size; t++) { j = V->list[t], V->pos[j] = 0; if (W->pos[j] != 0) { /* remove vertex j from W */ if (W->pos[j] != W->size) { int jj = W->list[W->size]; W->list[W->pos[j]] = jj; W->pos[jj] = W->pos[j]; } W->size--, W->pos[j] = 0; } } V->size = 0; /* now set W contains only vertices whose incident edges are still not covered by existing cliques; create new cliques to cover remaining edges until set W becomes empty */ while (W->size > 0) { /* find clique C[m], 1 <= m <= k, which shares maximal number of vertices with W; to break ties choose clique having smallest number m */ m = 0, best = -1; k = H->nv - n; for (t = 1; t <= k; t++) { /* compute cardinality of intersection of W and C[t] */ card = 0; for (a = H->v[n + t]->in; a != NULL; a = a->h_next) { j = a->tail->i; if (W->pos[j] != 0) card++; } if (best < card) m = t, best = card; } xassert(m > 0); /* set k := k + 1 and create new clique: C[k] := (W intersect C[m]) union { i }, which covers all edges incident to vertices from (W intersect C[m]) */ k = glp_add_vertices(H, 1) - n; for (a = H->v[n + m]->in; a != NULL; a = a->h_next) { j = a->tail->i; if (W->pos[j] != 0) { /* vertex j is in both W and C[m]; include it in new clique C[k] */ glp_add_arc(H, j, n + k); /* remove vertex j from W, since edge (i,j) will be covered by new clique C[k] */ if (W->pos[j] != W->size) { int jj = W->list[W->size]; W->list[W->pos[j]] = jj; W->pos[jj] = W->pos[j]; } W->size--, W->pos[j] = 0; } } /* include vertex i to new clique C[k] to cover edges (i,j) incident to all vertices j just removed from W */ glp_add_arc(H, i, n + k); } } /* free working arrays */ xfree(W->list); xfree(W->pos); xfree(V->list); xfree(V->pos); /* return the number of cliques in the edge covering found */ return H->nv - n; }
extern int archive_write_file(Buf buffer, char *cluster_name, time_t period_start, time_t period_end, char *arch_dir, char *arch_type, uint32_t archive_period) { int fd = 0; int rc = SLURM_SUCCESS; char *old_file = NULL, *new_file = NULL, *reg_file = NULL; static int high_buffer_size = (1024 * 1024); static pthread_mutex_t local_file_lock = PTHREAD_MUTEX_INITIALIZER; xassert(buffer); slurm_mutex_lock(&local_file_lock); /* write the buffer to file */ reg_file = _make_archive_name(period_start, period_end, cluster_name, arch_dir, arch_type, archive_period); debug("Storing %s archive for %s at %s", arch_type, cluster_name, reg_file); old_file = xstrdup_printf("%s.old", reg_file); new_file = xstrdup_printf("%s.new", reg_file); fd = creat(new_file, 0600); if (fd < 0) { error("Can't save archive, create file %s error %m", new_file); rc = SLURM_ERROR; } else { int pos = 0, nwrite = get_buf_offset(buffer), amount; char *data = (char *)get_buf_data(buffer); high_buffer_size = MAX(nwrite, high_buffer_size); while (nwrite > 0) { amount = write(fd, &data[pos], nwrite); if ((amount < 0) && (errno != EINTR)) { error("Error writing file %s, %m", new_file); rc = SLURM_ERROR; break; } nwrite -= amount; pos += amount; } fsync(fd); close(fd); } if (rc) (void) unlink(new_file); else { /* file shuffle */ int ign; /* avoid warning */ (void) unlink(old_file); ign = link(reg_file, old_file); (void) unlink(reg_file); ign = link(new_file, reg_file); (void) unlink(new_file); } xfree(old_file); xfree(reg_file); xfree(new_file); slurm_mutex_unlock(&local_file_lock); return rc; }
int glp_mincost_okalg(glp_graph *G, int v_rhs, int a_low, int a_cap, int a_cost, double *sol, int a_x, int v_pi) { /* find minimum-cost flow with out-of-kilter algorithm */ glp_vertex *v; glp_arc *a; int nv, na, i, k, s, t, *tail, *head, *low, *cap, *cost, *x, *pi, ret; double sum, temp; if (v_rhs >= 0 && v_rhs > G->v_size - (int)sizeof(double)) xerror("glp_mincost_okalg: v_rhs = %d; invalid offset\n", v_rhs); if (a_low >= 0 && a_low > G->a_size - (int)sizeof(double)) xerror("glp_mincost_okalg: a_low = %d; invalid offset\n", a_low); if (a_cap >= 0 && a_cap > G->a_size - (int)sizeof(double)) xerror("glp_mincost_okalg: a_cap = %d; invalid offset\n", a_cap); if (a_cost >= 0 && a_cost > G->a_size - (int)sizeof(double)) xerror("glp_mincost_okalg: a_cost = %d; invalid offset\n", a_cost); if (a_x >= 0 && a_x > G->a_size - (int)sizeof(double)) xerror("glp_mincost_okalg: a_x = %d; invalid offset\n", a_x); if (v_pi >= 0 && v_pi > G->v_size - (int)sizeof(double)) xerror("glp_mincost_okalg: v_pi = %d; invalid offset\n", v_pi); /* s is artificial source node */ s = G->nv + 1; /* t is artificial sink node */ t = s + 1; /* nv is the total number of nodes in the resulting network */ nv = t; /* na is the total number of arcs in the resulting network */ na = G->na + 1; for (i = 1; i <= G->nv; i++) { v = G->v[i]; if (v_rhs >= 0) memcpy(&temp, (char *)v->data + v_rhs, sizeof(double)); else temp = 0.0; if (temp != 0.0) na++; } /* allocate working arrays */ tail = xcalloc(1+na, sizeof(int)); head = xcalloc(1+na, sizeof(int)); low = xcalloc(1+na, sizeof(int)); cap = xcalloc(1+na, sizeof(int)); cost = xcalloc(1+na, sizeof(int)); x = xcalloc(1+na, sizeof(int)); pi = xcalloc(1+nv, sizeof(int)); /* construct the resulting network */ k = 0; /* (original arcs) */ for (i = 1; i <= G->nv; i++) { v = G->v[i]; for (a = v->out; a != NULL; a = a->t_next) { k++; tail[k] = a->tail->i; head[k] = a->head->i; if (tail[k] == head[k]) { ret = GLP_EDATA; goto done; } if (a_low >= 0) memcpy(&temp, (char *)a->data + a_low, sizeof(double)); else temp = 0.0; if (!(0.0 <= temp && temp <= (double)INT_MAX && temp == floor(temp))) { ret = GLP_EDATA; goto done; } low[k] = (int)temp; if (a_cap >= 0) memcpy(&temp, (char *)a->data + a_cap, sizeof(double)); else temp = 1.0; if (!((double)low[k] <= temp && temp <= (double)INT_MAX && temp == floor(temp))) { ret = GLP_EDATA; goto done; } cap[k] = (int)temp; if (a_cost >= 0) memcpy(&temp, (char *)a->data + a_cost, sizeof(double)); else temp = 0.0; if (!(fabs(temp) <= (double)INT_MAX && temp == floor(temp))) { ret = GLP_EDATA; goto done; } cost[k] = (int)temp; } } /* (artificial arcs) */ sum = 0.0; for (i = 1; i <= G->nv; i++) { v = G->v[i]; if (v_rhs >= 0) memcpy(&temp, (char *)v->data + v_rhs, sizeof(double)); else temp = 0.0; if (!(fabs(temp) <= (double)INT_MAX && temp == floor(temp))) { ret = GLP_EDATA; goto done; } if (temp > 0.0) { /* artificial arc from s to original source i */ k++; tail[k] = s; head[k] = i; low[k] = cap[k] = (int)(+temp); /* supply */ cost[k] = 0; sum += (double)temp; } else if (temp < 0.0) { /* artificial arc from original sink i to t */ k++; tail[k] = i; head[k] = t; low[k] = cap[k] = (int)(-temp); /* demand */ cost[k] = 0; } } /* (feedback arc from t to s) */ k++; xassert(k == na); tail[k] = t; head[k] = s; if (sum > (double)INT_MAX) { ret = GLP_EDATA; goto done; } low[k] = cap[k] = (int)sum; /* total supply/demand */ cost[k] = 0; /* find minimal-cost circulation in the resulting network */ ret = okalg(nv, na, tail, head, low, cap, cost, x, pi); switch (ret) { case 0: /* optimal circulation found */ ret = 0; break; case 1: /* no feasible circulation exists */ ret = GLP_ENOPFS; break; case 2: /* integer overflow occured */ ret = GLP_ERANGE; goto done; case 3: /* optimality test failed (logic error) */ ret = GLP_EFAIL; goto done; default: xassert(ret != ret); } /* store solution components */ /* (objective function = the total cost) */ if (sol != NULL) { temp = 0.0; for (k = 1; k <= na; k++) temp += (double)cost[k] * (double)x[k]; *sol = temp; } /* (arc flows) */ if (a_x >= 0) { k = 0; for (i = 1; i <= G->nv; i++) { v = G->v[i]; for (a = v->out; a != NULL; a = a->t_next) { temp = (double)x[++k]; memcpy((char *)a->data + a_x, &temp, sizeof(double)); } } } /* (node potentials = Lagrange multipliers) */ if (v_pi >= 0) { for (i = 1; i <= G->nv; i++) { v = G->v[i]; temp = - (double)pi[i]; memcpy((char *)v->data + v_pi, &temp, sizeof(double)); } } done: /* free working arrays */ xfree(tail); xfree(head); xfree(low); xfree(cap); xfree(cost); xfree(x); xfree(pi); return ret; }
extern int bg_free_block(bg_record_t *bg_record, bool wait, bool locked) { int rc = SLURM_SUCCESS; int count = 0; if (!bg_record) { error("bg_free_block: there was no bg_record"); return SLURM_ERROR; } if (!locked) slurm_mutex_lock(&block_state_mutex); while (count < MAX_FREE_RETRIES) { /* block was removed */ if (bg_record->magic != BLOCK_MAGIC) { error("block was removed while freeing it here"); xassert(0); if (!locked) slurm_mutex_unlock(&block_state_mutex); return SLURM_SUCCESS; } /* Reset these here so we don't try to reboot it when the state goes to free. */ bg_record->boot_state = 0; bg_record->boot_count = 0; /* Here we don't need to check if the block is still * in exsistance since this function can't be called on * the same block twice. It may * had already been removed at this point also. */ #ifdef HAVE_BG_FILES if (bg_record->state != BG_BLOCK_FREE && bg_record->state != BG_BLOCK_TERM) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("bridge_destroy %s", bg_record->bg_block_id); rc = bridge_block_free(bg_record); if (rc != SLURM_SUCCESS) { if (rc == BG_ERROR_BLOCK_NOT_FOUND) { debug("block %s is not found", bg_record->bg_block_id); bg_record->state = BG_BLOCK_FREE; break; } else if (rc == BG_ERROR_FREE) { if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("bridge_block_free" "(%s): %s State = %s", bg_record->bg_block_id, bg_err_str(rc), bg_block_state_string( bg_record->state)); } else if (rc == BG_ERROR_INVALID_STATE) { #ifndef HAVE_BGL /* If the state is error and we get an incompatible state back here, it means we set it ourselves so break out. */ if (bg_record->state & BG_BLOCK_ERROR_FLAG) break; #endif if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("bridge_block_free" "(%s): %s State = %s", bg_record->bg_block_id, bg_err_str(rc), bg_block_state_string( bg_record->state)); #ifdef HAVE_BGQ if (bg_record->state != BG_BLOCK_FREE && bg_record->state != BG_BLOCK_TERM) bg_record->state = BG_BLOCK_TERM; #endif } else { error("bridge_block_free" "(%s): %s State = %s", bg_record->bg_block_id, bg_err_str(rc), bg_block_state_string( bg_record->state)); } } } #else /* Fake a free since we are n deallocating state before this. */ if (bg_record->state & BG_BLOCK_ERROR_FLAG) { /* This will set the state to ERROR(Free) * just incase the state was ERROR(SOMETHING ELSE) */ bg_record->state = BG_BLOCK_ERROR_FLAG; break; } else if (!wait || (count >= 3)) bg_record->state = BG_BLOCK_FREE; else if (bg_record->state != BG_BLOCK_FREE) bg_record->state = BG_BLOCK_TERM; #endif if (!wait || (bg_record->state == BG_BLOCK_FREE) #ifndef HAVE_BGL || (bg_record->state & BG_BLOCK_ERROR_FLAG) #endif ) { break; } /* If we were locked outside of this we need to unlock to not cause deadlock on this mutex until we are done. */ slurm_mutex_unlock(&block_state_mutex); sleep(FREE_SLEEP_INTERVAL); count++; slurm_mutex_lock(&block_state_mutex); } rc = SLURM_SUCCESS; if ((bg_record->state == BG_BLOCK_FREE) || (bg_record->state & BG_BLOCK_ERROR_FLAG)) { if (bg_record->err_ratio && (bg_record->state == BG_BLOCK_FREE)) { /* Sometime the realtime server can report software error on cnodes even though the block is free. If this is the case we need to manually clear them. */ ba_mp_t *found_ba_mp; ListIterator itr = list_iterator_create(bg_record->ba_mp_list); debug("block %s is free, but has %u cnodes in error", bg_record->bg_block_id, bg_record->cnode_err_cnt); while ((found_ba_mp = list_next(itr))) { if (!found_ba_mp->used) continue; if (!found_ba_mp->cnode_err_bitmap) found_ba_mp->cnode_err_bitmap = bit_alloc( bg_conf->mp_cnode_cnt); bit_nclear(found_ba_mp->cnode_err_bitmap, 0, bit_size(found_ba_mp-> cnode_err_bitmap)-1); } list_iterator_destroy(itr); bg_record->cnode_err_cnt = 0; bg_record->err_ratio = 0; } remove_from_bg_list(bg_lists->booted, bg_record); } else if (count >= MAX_FREE_RETRIES) { /* Something isn't right, go mark this one in an error state. */ update_block_msg_t block_msg; if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) info("bg_free_block: block %s is not in state " "free (%s), putting it in error state.", bg_record->bg_block_id, bg_block_state_string(bg_record->state)); slurm_init_update_block_msg(&block_msg); block_msg.bg_block_id = bg_record->bg_block_id; block_msg.state = BG_BLOCK_ERROR_FLAG; block_msg.reason = "Block would not deallocate"; slurm_mutex_unlock(&block_state_mutex); select_g_update_block(&block_msg); slurm_mutex_lock(&block_state_mutex); rc = SLURM_ERROR; } if (!locked) slurm_mutex_unlock(&block_state_mutex); return rc; }
void glp_maxflow_lp(glp_prob *lp, glp_graph *G, int names, int s, int t, int a_cap) { glp_vertex *v; glp_arc *a; int i, j, type, ind[1+2]; double cap, val[1+2]; if (!(names == GLP_ON || names == GLP_OFF)) xerror("glp_maxflow_lp: names = %d; invalid parameter\n", names); if (!(1 <= s && s <= G->nv)) xerror("glp_maxflow_lp: s = %d; source node number out of rang" "e\n", s); if (!(1 <= t && t <= G->nv)) xerror("glp_maxflow_lp: t = %d: sink node number out of range " "\n", t); if (s == t) xerror("glp_maxflow_lp: s = t = %d; source and sink nodes must" " be distinct\n", s); if (a_cap >= 0 && a_cap > G->a_size - (int)sizeof(double)) xerror("glp_maxflow_lp: a_cap = %d; invalid offset\n", a_cap); glp_erase_prob(lp); if (names) glp_set_prob_name(lp, G->name); glp_set_obj_dir(lp, GLP_MAX); glp_add_rows(lp, G->nv); for (i = 1; i <= G->nv; i++) { v = G->v[i]; if (names) glp_set_row_name(lp, i, v->name); if (i == s) type = GLP_LO; else if (i == t) type = GLP_UP; else type = GLP_FX; glp_set_row_bnds(lp, i, type, 0.0, 0.0); } if (G->na > 0) glp_add_cols(lp, G->na); for (i = 1, j = 0; i <= G->nv; i++) { v = G->v[i]; for (a = v->out; a != NULL; a = a->t_next) { j++; if (names) { char name[50+1]; sprintf(name, "x[%d,%d]", a->tail->i, a->head->i); xassert(strlen(name) < sizeof(name)); glp_set_col_name(lp, j, name); } if (a->tail->i != a->head->i) { ind[1] = a->tail->i, val[1] = +1.0; ind[2] = a->head->i, val[2] = -1.0; glp_set_mat_col(lp, j, 2, ind, val); } if (a_cap >= 0) memcpy(&cap, (char *)a->data + a_cap, sizeof(double)); else cap = 1.0; if (cap == DBL_MAX) type = GLP_LO; else if (cap != 0.0) type = GLP_DB; else type = GLP_FX; glp_set_col_bnds(lp, j, type, 0.0, cap); if (a->tail->i == s) glp_set_obj_coef(lp, j, +1.0); else if (a->head->i == s) glp_set_obj_coef(lp, j, -1.0); } } xassert(j == G->na); return; }
/* * Update front end node state * update_front_end_msg_ptr IN change specification * RET SLURM_SUCCESS or error code */ extern int update_front_end(update_front_end_msg_t *msg_ptr) { #ifdef HAVE_FRONT_END char *this_node_name = NULL; hostlist_t host_list; front_end_record_t *front_end_ptr; int i, rc = SLURM_SUCCESS; time_t now = time(NULL); if ((host_list = hostlist_create(msg_ptr->name)) == NULL) { error("hostlist_create error on %s: %m", msg_ptr->name); return ESLURM_INVALID_NODE_NAME; } last_front_end_update = now; while ((this_node_name = hostlist_shift(host_list))) { for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt; i++, front_end_ptr++) { xassert(front_end_ptr->magic == FRONT_END_MAGIC); if (xstrcmp(this_node_name, front_end_ptr->name)) continue; if (msg_ptr->node_state == (uint32_t)NO_VAL) { ; /* No change in node state */ } else if (msg_ptr->node_state == NODE_RESUME) { front_end_ptr->node_state = NODE_STATE_IDLE; xfree(front_end_ptr->reason); front_end_ptr->reason_time = 0; front_end_ptr->reason_uid = 0; } else if (msg_ptr->node_state == NODE_STATE_DRAIN) { front_end_ptr->node_state |= NODE_STATE_DRAIN; if (msg_ptr->reason) { xfree(front_end_ptr->reason); front_end_ptr->reason = xstrdup(msg_ptr->reason); front_end_ptr->reason_time = now; front_end_ptr->reason_uid = msg_ptr->reason_uid; } } else if (msg_ptr->node_state == NODE_STATE_DOWN) { set_front_end_down(front_end_ptr, msg_ptr->reason); } if (msg_ptr->node_state != (uint32_t) NO_VAL) { info("update_front_end: set state of %s to %s", this_node_name, node_state_string(front_end_ptr-> node_state)); } break; } if (i >= front_end_node_cnt) { info("update_front_end: could not find front end: %s", this_node_name); rc = ESLURM_INVALID_NODE_NAME; } free(this_node_name); } hostlist_destroy(host_list); return rc; #else return ESLURM_INVALID_NODE_NAME; #endif }
/* * _lllp_generate_cpu_bind * * Generate the cpu_bind type and string given an array of bitstr_t masks * * IN/OUT- job launch request (cpu_bind_type and cpu_bind updated) * IN- maximum number of tasks * IN- array of masks */ static void _lllp_generate_cpu_bind(launch_tasks_request_msg_t *req, const uint32_t maxtasks, bitstr_t **masks) { int i, num_bits=0, masks_len; bitstr_t *bitmask; bitoff_t charsize; char *masks_str = NULL; char buf_type[100]; for (i = 0; i < maxtasks; i++) { bitmask = masks[i]; if (bitmask) { num_bits = bit_size(bitmask); break; } } charsize = (num_bits + 3) / 4; /* ASCII hex digits */ charsize += 3; /* "0x" and trailing "," */ masks_len = maxtasks * charsize + 1; /* number of masks + null */ debug3("_lllp_generate_cpu_bind %d %d %d", maxtasks, charsize, masks_len); masks_str = xmalloc(masks_len); masks_len = 0; for (i = 0; i < maxtasks; i++) { char *str; int curlen; bitmask = masks[i]; if (bitmask == NULL) { continue; } str = (char *)bit_fmt_hexmask(bitmask); curlen = strlen(str) + 1; if (masks_len > 0) masks_str[masks_len-1]=','; strncpy(&masks_str[masks_len], str, curlen); masks_len += curlen; xassert(masks_str[masks_len] == '\0'); xfree(str); } if (req->cpu_bind) { xfree(req->cpu_bind); } if (masks_str[0] != '\0') { req->cpu_bind = masks_str; req->cpu_bind_type |= CPU_BIND_MASK; } else { req->cpu_bind = NULL; req->cpu_bind_type &= ~CPU_BIND_VERBOSE; } /* clear mask generation bits */ req->cpu_bind_type &= ~CPU_BIND_TO_THREADS; req->cpu_bind_type &= ~CPU_BIND_TO_CORES; req->cpu_bind_type &= ~CPU_BIND_TO_SOCKETS; req->cpu_bind_type &= ~CPU_BIND_TO_LDOMS; slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type); info("_lllp_generate_cpu_bind jobid [%u]: %s, %s", req->job_id, buf_type, masks_str); }