Ejemplo n.º 1
0
int glp_write_lp(glp_prob *P, const glp_cpxcp *parm, const char *fname)
{     /* write problem data in CPLEX LP format */
      glp_cpxcp _parm;
      struct csa _csa, *csa = &_csa;
      glp_file *fp;
      GLPROW *row;
      GLPCOL *col;
      GLPAIJ *aij;
      int i, j, len, flag, count, ret;
      char line[1000+1], term[500+1], name[255+1];
      xprintf("Writing problem data to '%s'...\n", fname);
      if (parm == NULL)
         glp_init_cpxcp(&_parm), parm = &_parm;
      /* check control parameters */
      check_parm("glp_write_lp", parm);
      /* initialize common storage area */
      csa->P = P;
      csa->parm = parm;
      /* create output CPLEX LP file */
      fp = glp_open(fname, "w"), count = 0;
      if (fp == NULL)
      {  xprintf("Unable to create '%s' - %s\n", fname, get_err_msg());
         ret = 1;
         goto done;
      }
      /* write problem name */
      xfprintf(fp, "\\* Problem: %s *\\\n",
         P->name == NULL ? "Unknown" : P->name), count++;
      xfprintf(fp, "\n"), count++;
      /* the problem should contain at least one row and one column */
      if (!(P->m > 0 && P->n > 0))
      {  xprintf("Warning: problem has no rows/columns\n");
         xfprintf(fp, "\\* WARNING: PROBLEM HAS NO ROWS/COLUMNS *\\\n"),
            count++;
         xfprintf(fp, "\n"), count++;
         goto skip;
      }
      /* write the objective function definition */
      if (P->dir == GLP_MIN)
         xfprintf(fp, "Minimize\n"), count++;
      else if (P->dir == GLP_MAX)
         xfprintf(fp, "Maximize\n"), count++;
      else
         xassert(P != P);
      row_name(csa, 0, name);
      sprintf(line, " %s:", name);
      len = 0;
      for (j = 1; j <= P->n; j++)
      {  col = P->col[j];
         if (col->coef != 0.0 || col->ptr == NULL)
         {  len++;
            col_name(csa, j, name);
            if (col->coef == 0.0)
               sprintf(term, " + 0 %s", name); /* empty column */
            else if (col->coef == +1.0)
               sprintf(term, " + %s", name);
            else if (col->coef == -1.0)
               sprintf(term, " - %s", name);
            else if (col->coef > 0.0)
               sprintf(term, " + %.*g %s", DBL_DIG, +col->coef, name);
            else
               sprintf(term, " - %.*g %s", DBL_DIG, -col->coef, name);
            if (strlen(line) + strlen(term) > 72)
               xfprintf(fp, "%s\n", line), line[0] = '\0', count++;
            strcat(line, term);
         }
      }
      if (len == 0)
      {  /* empty objective */
         sprintf(term, " 0 %s", col_name(csa, 1, name));
         strcat(line, term);
      }
      xfprintf(fp, "%s\n", line), count++;
      if (P->c0 != 0.0)
         xfprintf(fp, "\\* constant term = %.*g *\\\n", DBL_DIG, P->c0),
            count++;
      xfprintf(fp, "\n"), count++;
      /* write the constraints section */
      xfprintf(fp, "Subject To\n"), count++;
      for (i = 1; i <= P->m; i++)
      {  row = P->row[i];
         if (row->type == GLP_FR) continue; /* skip free row */
         row_name(csa, i, name);
         sprintf(line, " %s:", name);
         /* linear form */
         for (aij = row->ptr; aij != NULL; aij = aij->r_next)
         {  col_name(csa, aij->col->j, name);
            if (aij->val == +1.0)
               sprintf(term, " + %s", name);
            else if (aij->val == -1.0)
               sprintf(term, " - %s", name);
            else if (aij->val > 0.0)
               sprintf(term, " + %.*g %s", DBL_DIG, +aij->val, name);
            else
               sprintf(term, " - %.*g %s", DBL_DIG, -aij->val, name);
            if (strlen(line) + strlen(term) > 72)
               xfprintf(fp, "%s\n", line), line[0] = '\0', count++;
            strcat(line, term);
         }
         if (row->type == GLP_DB)
         {  /* double-bounded (ranged) constraint */
            sprintf(term, " - ~r_%d", i);
            if (strlen(line) + strlen(term) > 72)
               xfprintf(fp, "%s\n", line), line[0] = '\0', count++;
            strcat(line, term);
         }
         else if (row->ptr == NULL)
         {  /* empty constraint */
            sprintf(term, " 0 %s", col_name(csa, 1, name));
            strcat(line, term);
         }
         /* right hand-side */
         if (row->type == GLP_LO)
            sprintf(term, " >= %.*g", DBL_DIG, row->lb);
         else if (row->type == GLP_UP)
            sprintf(term, " <= %.*g", DBL_DIG, row->ub);
         else if (row->type == GLP_DB || row->type == GLP_FX)
            sprintf(term, " = %.*g", DBL_DIG, row->lb);
         else
            xassert(row != row);
         if (strlen(line) + strlen(term) > 72)
            xfprintf(fp, "%s\n", line), line[0] = '\0', count++;
         strcat(line, term);
         xfprintf(fp, "%s\n", line), count++;
      }
      xfprintf(fp, "\n"), count++;
      /* write the bounds section */
      flag = 0;
      for (i = 1; i <= P->m; i++)
      {  row = P->row[i];
         if (row->type != GLP_DB) continue;
         if (!flag)
            xfprintf(fp, "Bounds\n"), flag = 1, count++;
         xfprintf(fp, " 0 <= ~r_%d <= %.*g\n",
            i, DBL_DIG, row->ub - row->lb), count++;
      }
      for (j = 1; j <= P->n; j++)
      {  col = P->col[j];
         if (col->type == GLP_LO && col->lb == 0.0) continue;
         if (!flag)
            xfprintf(fp, "Bounds\n"), flag = 1, count++;
         col_name(csa, j, name);
         if (col->type == GLP_FR)
            xfprintf(fp, " %s free\n", name), count++;
         else if (col->type == GLP_LO)
            xfprintf(fp, " %s >= %.*g\n",
               name, DBL_DIG, col->lb), count++;
         else if (col->type == GLP_UP)
            xfprintf(fp, " -Inf <= %s <= %.*g\n",
               name, DBL_DIG, col->ub), count++;
         else if (col->type == GLP_DB)
            xfprintf(fp, " %.*g <= %s <= %.*g\n",
               DBL_DIG, col->lb, name, DBL_DIG, col->ub), count++;
         else if (col->type == GLP_FX)
            xfprintf(fp, " %s = %.*g\n",
               name, DBL_DIG, col->lb), count++;
         else
            xassert(col != col);
      }
      if (flag) xfprintf(fp, "\n"), count++;
      /* write the integer section */
      flag = 0;
      for (j = 1; j <= P->n; j++)
      {  col = P->col[j];
         if (col->kind == GLP_CV) continue;
         xassert(col->kind == GLP_IV);
         if (!flag)
            xfprintf(fp, "Generals\n"), flag = 1, count++;
         xfprintf(fp, " %s\n", col_name(csa, j, name)), count++;
      }
      if (flag) xfprintf(fp, "\n"), count++;
skip: /* write the end keyword */
      xfprintf(fp, "End\n"), count++;
#if 0 /* FIXME */
      xfflush(fp);
#endif
      if (glp_ioerr(fp))
      {  xprintf("Write error on '%s' - %s\n", fname, get_err_msg());
         ret = 1;
         goto done;
      }
      /* problem data has been successfully written */
      xprintf("%d lines were written\n", count);
      ret = 0;
done: if (fp != NULL) glp_close(fp);
      return ret;
}
Ejemplo n.º 2
0
static void *_track_freeing_blocks(void *args)
{
	bg_free_block_list_t *bg_free_list = (bg_free_block_list_t *)args;
	List track_list = bg_free_list->track_list;
	bool destroy = bg_free_list->destroy;
	uint32_t job_id = bg_free_list->job_id;
	int retry_cnt = 0;
	int free_cnt = 0, track_cnt = list_count(track_list);
	ListIterator itr = list_iterator_create(track_list);
	bg_record_t *bg_record;
	bool restore = true;

	debug("_track_freeing_blocks: Going to free %d for job %u",
	      track_cnt, job_id);
	while (retry_cnt < MAX_FREE_RETRIES) {
		free_cnt = 0;
		slurm_mutex_lock(&block_state_mutex);

		/* just to make sure state is updated */
		bridge_status_update_block_list_state(track_list);

		list_iterator_reset(itr);
		/* just incase this changes from the update function */
		track_cnt = list_count(track_list);
		while ((bg_record = list_next(itr))) {
			if (bg_record->magic != BLOCK_MAGIC) {
				/* update_block_list_state should
				   remove this already from the list
				   so we shouldn't ever have this.
				*/
				error("_track_freeing_blocks: block was "
				      "already destroyed %p", bg_record);
				xassert(0);
				free_cnt++;
				continue;
			}
#ifndef HAVE_BG_FILES
			/* Fake a free since we are n deallocating
			   state before this.
			*/
			if (!(bg_record->state & BG_BLOCK_ERROR_FLAG)
			    && (retry_cnt >= 3))
				bg_record->state = BG_BLOCK_FREE;
#endif
			if ((bg_record->state == BG_BLOCK_FREE)
			    || (bg_record->state & BG_BLOCK_ERROR_FLAG))
				free_cnt++;
			else if (bg_record->state != BG_BLOCK_TERM)
				bg_free_block(bg_record, 0, 1);
		}
		slurm_mutex_unlock(&block_state_mutex);
		if (free_cnt == track_cnt)
			break;
		debug("_track_freeing_blocks: freed %d of %d for job %u",
		      free_cnt, track_cnt, job_id);
		sleep(FREE_SLEEP_INTERVAL);
		retry_cnt++;
	}
	debug("_track_freeing_blocks: Freed them all for job %u", job_id);

	if (destroy)
		restore = false;

	/* If there is a block in error state we need to keep all
	 * these blocks around. */
	slurm_mutex_lock(&block_state_mutex);
	list_iterator_reset(itr);
	while ((bg_record = list_next(itr))) {
		/* block no longer exists */
		if (bg_record->magic != BLOCK_MAGIC)
			continue;
		if (bg_record->state != BG_BLOCK_FREE) {
			restore = true;
			break;
		}
	}

	list_iterator_reset(itr);
	while ((bg_record = list_next(itr)))
		_post_block_free(bg_record, restore);
	slurm_mutex_unlock(&block_state_mutex);
	last_bg_update = time(NULL);
	list_iterator_destroy(itr);
	list_destroy(track_list);
	xfree(bg_free_list);
	return NULL;
}
Ejemplo n.º 3
0
/* block_state_mutex should be locked before calling this */
static int _post_block_free(bg_record_t *bg_record, bool restore)
{
	int rc = SLURM_SUCCESS;

	if (bg_record->magic != BLOCK_MAGIC) {
		error("block already destroyed %p", bg_record);
		xassert(0);
		return SLURM_ERROR;
	}

	bg_record->free_cnt--;
	if (bg_record->free_cnt == -1) {
		info("we got a negative 1 here for %s",
		     bg_record->bg_block_id);
		xassert(0);
		return SLURM_SUCCESS;
	} else if (bg_record->modifying) {
		info("others are modifing this block %s, don't clear it up",
		     bg_record->bg_block_id);
		return SLURM_SUCCESS;
	} else if (bg_record->free_cnt) {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("%d others are trying to destroy this block %s",
			     bg_record->free_cnt, bg_record->bg_block_id);
		return SLURM_SUCCESS;
	}

	/* Even if the block is already in error state we need to do this to
	   avoid any overlapping blocks that may have been created due
	   to bad hardware.
	*/
	if ((bg_record->state & (~BG_BLOCK_ERROR_FLAG)) != BG_BLOCK_FREE) {
		/* Something isn't right, go mark this one in an error
		   state. */
		update_block_msg_t block_msg;
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("_post_block_free: block %s is not in state "
			     "free (%s), putting it in error state.",
			     bg_record->bg_block_id,
			     bg_block_state_string(bg_record->state));
		slurm_init_update_block_msg(&block_msg);
		block_msg.bg_block_id = bg_record->bg_block_id;
		block_msg.state = BG_BLOCK_ERROR_FLAG;
		block_msg.reason = "Block would not deallocate";
		slurm_mutex_unlock(&block_state_mutex);
		select_g_update_block(&block_msg);
		slurm_mutex_lock(&block_state_mutex);
		if (block_ptr_exist_in_list(bg_lists->main, bg_record))
			bg_record->destroy = 0;
		return SLURM_SUCCESS;
	}

	/* If we are here we are done with the destroy so just reset it. */
	bg_record->destroy = 0;

	/* A bit of a sanity check to make sure blocks are being
	   removed out of all the lists.
	*/
	remove_from_bg_list(bg_lists->booted, bg_record);
	if (remove_from_bg_list(bg_lists->job_running, bg_record)
	    == SLURM_SUCCESS) {
		debug2("_post_block_free: we are freeing block %s and "
		       "it was in the job_running list.  This can happen if a "
		       "block is removed while waiting for mmcs to finish "
		       "removing the job from the block.",
		       bg_record->bg_block_id);
		num_unused_cpus += bg_record->cpu_cnt;
	}

	/* If we don't have any mp_counts force block removal */
	if (restore && bg_record->mp_count)
		return SLURM_SUCCESS;

	if (remove_from_bg_list(bg_lists->main, bg_record) != SLURM_SUCCESS) {
		/* This should only happen if called from
		 * bg_job_place.c where the block was never added to
		 * the list. */
		debug("_post_block_free: It appears this block %s isn't "
		      "in the main list anymore.",
		      bg_record->bg_block_id);
	}

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
		info("_post_block_free: removing %s from database",
		     bg_record->bg_block_id);

	rc = bridge_block_remove(bg_record);
	if (rc != SLURM_SUCCESS) {
		if (rc == BG_ERROR_BLOCK_NOT_FOUND) {
			debug("_post_block_free: block %s is not found",
			      bg_record->bg_block_id);
		} else {
			error("_post_block_free: "
			      "bridge_block_remove(%s): %s",
			      bg_record->bg_block_id,
			      bg_err_str(rc));
		}
	} else if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
		info("_post_block_free: done %s(%p)",
		     bg_record->bg_block_id, bg_record);

	destroy_bg_record(bg_record);
	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
		info("_post_block_free: destroyed");

	return SLURM_SUCCESS;
}
Ejemplo n.º 4
0
static List _get_precs(List task_list, bool pgid_plugin, uint64_t cont_id,
		       jag_callbacks_t *callbacks)
{
	List prec_list = list_create(destroy_jag_prec);
	char	proc_stat_file[256];	/* Allow ~20x extra length */
	char	proc_io_file[256];	/* Allow ~20x extra length */
	char	proc_smaps_file[256];	/* Allow ~20x extra length */
	static	int	slash_proc_open = 0;
	int i;
	struct jobacctinfo *jobacct = NULL;

	xassert(task_list);

	jobacct = list_peek(task_list);

	if (!pgid_plugin) {
		pid_t *pids = NULL;
		int npids = 0;
		/* get only the processes in the proctrack container */
		proctrack_g_get_pids(cont_id, &pids, &npids);
		if (!npids) {
			/* update consumed energy even if pids do not exist */
			if (jobacct) {
				acct_gather_energy_g_get_data(
					energy_profile,
					&jobacct->energy);
				jobacct->tres_usage_in_tot[TRES_ARRAY_ENERGY] =
					jobacct->energy.consumed_energy;
				jobacct->tres_usage_out_tot[TRES_ARRAY_ENERGY] =
					jobacct->energy.current_watts;
				debug2("%s: energy = %"PRIu64" watts = %"PRIu64,
				       __func__,
				       jobacct->tres_usage_in_tot[
					       TRES_ARRAY_ENERGY],
				       jobacct->tres_usage_out_tot[
					       TRES_ARRAY_ENERGY]);
			}

			debug4("no pids in this container %"PRIu64"", cont_id);
			goto finished;
		}
		for (i = 0; i < npids; i++) {
			snprintf(proc_stat_file, 256, "/proc/%d/stat", pids[i]);
			snprintf(proc_io_file, 256, "/proc/%d/io", pids[i]);
			snprintf(proc_smaps_file, 256, "/proc/%d/smaps", pids[i]);
			_handle_stats(prec_list, proc_stat_file, proc_io_file,
				      proc_smaps_file, callbacks,
				      jobacct ? jobacct->tres_count : 0);
		}
		xfree(pids);
	} else {
		struct dirent *slash_proc_entry;
		char  *iptr = NULL, *optr = NULL, *optr2 = NULL;

		if (slash_proc_open) {
			rewinddir(slash_proc);
		} else {
			slash_proc=opendir("/proc");
			if (slash_proc == NULL) {
				perror("opening /proc");
				goto finished;
			}
			slash_proc_open=1;
		}
		strcpy(proc_stat_file, "/proc/");
		strcpy(proc_io_file, "/proc/");
		strcpy(proc_smaps_file, "/proc/");

		while ((slash_proc_entry = readdir(slash_proc))) {

			/* Save a few cyles by simulating
			 * strcat(statFileName, slash_proc_entry->d_name);
			 * strcat(statFileName, "/stat");
			 * while checking for a numeric filename (which really
			 * should be a pid). Then do the same for the
			 * /proc/<pid>/io file name.
			 */
			optr = proc_stat_file + sizeof("/proc");
			iptr = slash_proc_entry->d_name;
			i = 0;
			do {
				if ((*iptr < '0') ||
				    ((*optr++ = *iptr++) > '9')) {
					i = -1;
					break;
				}
			} while (*iptr);

			if (i == -1)
				continue;
			iptr = (char*)"/stat";

			do {
				*optr++ = *iptr++;
			} while (*iptr);
			*optr = 0;

			optr2 = proc_io_file + sizeof("/proc");
			iptr = slash_proc_entry->d_name;
			i = 0;
			do {
				if ((*iptr < '0') ||
				    ((*optr2++ = *iptr++) > '9')) {
					i = -1;
					break;
				}
			} while (*iptr);
			if (i == -1)
				continue;
			iptr = (char*)"/io";

			do {
				*optr2++ = *iptr++;
			} while (*iptr);
			*optr2 = 0;

			optr2 = proc_smaps_file + sizeof("/proc");
			iptr = slash_proc_entry->d_name;
			i = 0;
			do {
				if ((*iptr < '0') ||
				    ((*optr2++ = *iptr++) > '9')) {
					i = -1;
					break;
				}
			} while (*iptr);
			if (i == -1)
				continue;
			iptr = (char*)"/smaps";

			do {
				*optr2++ = *iptr++;
			} while (*iptr);
			*optr2 = 0;

			_handle_stats(prec_list, proc_stat_file, proc_io_file,
				      proc_smaps_file,callbacks,
				      jobacct ? jobacct->tres_count : 0);
		}
	}

finished:

	return prec_list;
}
Ejemplo n.º 5
0
/*
 * addto_update_list - add object updated to list
 * IN/OUT update_list: list of updated objects
 * IN type: update type
 * IN object: object updated
 * RET: error code
 *
 * NOTE: This function will take the object given and free it later so it
 *       needed to be removed from a list if in one before.
 */
extern int addto_update_list(List update_list, slurmdb_update_type_t type,
			     void *object)
{
	slurmdb_update_object_t *update_object = NULL;
	slurmdb_association_rec_t *assoc = object;
	slurmdb_qos_rec_t *qos = object;
	ListIterator itr = NULL;
	if(!update_list) {
		error("no update list given");
		return SLURM_ERROR;
	}

	itr = list_iterator_create(update_list);
	while((update_object = list_next(itr))) {
		if(update_object->type == type)
			break;
	}
	list_iterator_destroy(itr);

	if(update_object) {
		/* here we prepend primarly for remove association
		   since parents need to be removed last, and they are
		   removed first in the calling code */
		list_prepend(update_object->objects, object);
		return SLURM_SUCCESS;
	}
	update_object = xmalloc(sizeof(slurmdb_update_object_t));

	list_append(update_list, update_object);

	update_object->type = type;

	list_sort(update_list, (ListCmpF)_sort_update_object_dec);

	switch(type) {
	case SLURMDB_MODIFY_USER:
	case SLURMDB_ADD_USER:
	case SLURMDB_REMOVE_USER:
	case SLURMDB_ADD_COORD:
	case SLURMDB_REMOVE_COORD:
		update_object->objects = list_create(slurmdb_destroy_user_rec);
		break;
	case SLURMDB_ADD_ASSOC:
		/* We are going to send these to the slurmctld's so
		   lets set up the correct limits to INIFINITE instead
		   of NO_VAL */
		if(assoc->grp_cpu_mins == (uint64_t)NO_VAL)
			assoc->grp_cpu_mins = (uint64_t)INFINITE;
		if(assoc->grp_cpu_run_mins == (uint64_t)NO_VAL)
			assoc->grp_cpu_run_mins = (uint64_t)INFINITE;
		if(assoc->grp_cpus == NO_VAL)
			assoc->grp_cpus = INFINITE;
		if(assoc->grp_jobs == NO_VAL)
			assoc->grp_jobs = INFINITE;
		if(assoc->grp_nodes == NO_VAL)
			assoc->grp_nodes = INFINITE;
		if(assoc->grp_submit_jobs == NO_VAL)
			assoc->grp_submit_jobs = INFINITE;
		if(assoc->grp_wall == NO_VAL)
			assoc->grp_wall = INFINITE;

		if(assoc->max_cpu_mins_pj == (uint64_t)NO_VAL)
			assoc->max_cpu_mins_pj = (uint64_t)INFINITE;
		if(assoc->max_cpu_run_mins == (uint64_t)NO_VAL)
			assoc->max_cpu_run_mins = (uint64_t)INFINITE;
		if(assoc->max_cpus_pj == NO_VAL)
			assoc->max_cpus_pj = INFINITE;
		if(assoc->max_jobs == NO_VAL)
			assoc->max_jobs = INFINITE;
		if(assoc->max_nodes_pj == NO_VAL)
			assoc->max_nodes_pj = INFINITE;
		if(assoc->max_submit_jobs == NO_VAL)
			assoc->max_submit_jobs = INFINITE;
		if(assoc->max_wall_pj == NO_VAL)
			assoc->max_wall_pj = INFINITE;
	case SLURMDB_MODIFY_ASSOC:
	case SLURMDB_REMOVE_ASSOC:
		xassert(((slurmdb_association_rec_t *)object)->cluster);
		update_object->objects = list_create(
			slurmdb_destroy_association_rec);
		break;
	case SLURMDB_ADD_QOS:
		/* We are going to send these to the slurmctld's so
		   lets set up the correct limits to INIFINITE instead
		   of NO_VAL */
		if(qos->grp_cpu_mins == (uint64_t)NO_VAL)
			qos->grp_cpu_mins = (uint64_t)INFINITE;
		if(qos->grp_cpu_run_mins == (uint64_t)NO_VAL)
			qos->grp_cpu_run_mins = (uint64_t)INFINITE;
		if(qos->grp_cpus == NO_VAL)
			qos->grp_cpus = INFINITE;
		if(qos->grp_jobs == NO_VAL)
			qos->grp_jobs = INFINITE;
		if(qos->grp_nodes == NO_VAL)
			qos->grp_nodes = INFINITE;
		if(qos->grp_submit_jobs == NO_VAL)
			qos->grp_submit_jobs = INFINITE;
		if(qos->grp_wall == NO_VAL)
			qos->grp_wall = INFINITE;

		if(qos->max_cpu_mins_pj == (uint64_t)NO_VAL)
			qos->max_cpu_mins_pj = (uint64_t)INFINITE;
		if(qos->max_cpu_run_mins_pu == (uint64_t)NO_VAL)
			qos->max_cpu_run_mins_pu = (uint64_t)INFINITE;
		if(qos->max_cpus_pj == NO_VAL)
			qos->max_cpus_pj = INFINITE;
		if(qos->max_cpus_pu == NO_VAL)
			qos->max_cpus_pu = INFINITE;
		if(qos->max_jobs_pu == NO_VAL)
			qos->max_jobs_pu = INFINITE;
		if(qos->max_nodes_pj == NO_VAL)
			qos->max_nodes_pj = INFINITE;
		if(qos->max_nodes_pu == NO_VAL)
			qos->max_nodes_pu = INFINITE;
		if(qos->max_submit_jobs_pu == NO_VAL)
			qos->max_submit_jobs_pu = INFINITE;
		if(qos->max_wall_pj == NO_VAL)
			qos->max_wall_pj = INFINITE;
	case SLURMDB_MODIFY_QOS:
	case SLURMDB_REMOVE_QOS:
		update_object->objects = list_create(
			slurmdb_destroy_qos_rec);
		break;
	case SLURMDB_ADD_WCKEY:
	case SLURMDB_MODIFY_WCKEY:
	case SLURMDB_REMOVE_WCKEY:
		xassert(((slurmdb_wckey_rec_t *)object)->cluster);
		update_object->objects = list_create(
			slurmdb_destroy_wckey_rec);
		break;
	case SLURMDB_ADD_CLUSTER:
	case SLURMDB_REMOVE_CLUSTER:
		/* This should only be the name of the cluster, and is
		   only used in the plugin for rollback purposes.
		*/
		update_object->objects = list_create(slurm_destroy_char);
		break;
	case SLURMDB_UPDATE_NOTSET:
	default:
		error("unknown type set in update_object: %d", type);
		return SLURM_ERROR;
	}
	debug4("XXX: update object with type %d added", type);
	list_append(update_object->objects, object);
	return SLURM_SUCCESS;
}
Ejemplo n.º 6
0
int glp_asnprob_okalg(int form, glp_graph *G, int v_set, int a_cost,
      double *sol, int a_x)
{     /* solve assignment problem with out-of-kilter algorithm */
      glp_vertex *v;
      glp_arc *a;
      int nv, na, i, k, *tail, *head, *low, *cap, *cost, *x, *pi, ret;
      double temp;
      if (!(form == GLP_ASN_MIN || form == GLP_ASN_MAX ||
            form == GLP_ASN_MMP))
         xerror("glp_asnprob_okalg: form = %d; invalid parameter\n",
            form);
      if (v_set >= 0 && v_set > G->v_size - (int)sizeof(int))
         xerror("glp_asnprob_okalg: v_set = %d; invalid offset\n",
            v_set);
      if (a_cost >= 0 && a_cost > G->a_size - (int)sizeof(double))
         xerror("glp_asnprob_okalg: a_cost = %d; invalid offset\n",
            a_cost);
      if (a_x >= 0 && a_x > G->a_size - (int)sizeof(int))
         xerror("glp_asnprob_okalg: a_x = %d; invalid offset\n", a_x);
      if (glp_check_asnprob(G, v_set))
         return GLP_EDATA;
      /* nv is the total number of nodes in the resulting network */
      nv = G->nv + 1;
      /* na is the total number of arcs in the resulting network */
      na = G->na + G->nv;
      /* allocate working arrays */
      tail = xcalloc(1+na, sizeof(int));
      head = xcalloc(1+na, sizeof(int));
      low = xcalloc(1+na, sizeof(int));
      cap = xcalloc(1+na, sizeof(int));
      cost = xcalloc(1+na, sizeof(int));
      x = xcalloc(1+na, sizeof(int));
      pi = xcalloc(1+nv, sizeof(int));
      /* construct the resulting network */
      k = 0;
      /* (original arcs) */
      for (i = 1; i <= G->nv; i++)
      {  v = G->v[i];
         for (a = v->out; a != NULL; a = a->t_next)
         {  k++;
            tail[k] = a->tail->i;
            head[k] = a->head->i;
            low[k] = 0;
            cap[k] = 1;
            if (a_cost >= 0)
               memcpy(&temp, (char *)a->data + a_cost, sizeof(double));
            else
               temp = 1.0;
            if (!(fabs(temp) <= (double)INT_MAX && temp == floor(temp)))
            {  ret = GLP_EDATA;
               goto done;
            }
            cost[k] = (int)temp;
            if (form != GLP_ASN_MIN) cost[k] = - cost[k];
         }
      }
      /* (artificial arcs) */
      for (i = 1; i <= G->nv; i++)
      {  v = G->v[i];
         k++;
         if (v->out == NULL)
            tail[k] = i, head[k] = nv;
         else if (v->in == NULL)
            tail[k] = nv, head[k] = i;
         else
            xassert(v != v);
         low[k] = (form == GLP_ASN_MMP ? 0 : 1);
         cap[k] = 1;
         cost[k] = 0;
      }
      xassert(k == na);
      /* find minimal-cost circulation in the resulting network */
      ret = okalg(nv, na, tail, head, low, cap, cost, x, pi);
      switch (ret)
      {  case 0:
            /* optimal circulation found */
            ret = 0;
            break;
         case 1:
            /* no feasible circulation exists */
            ret = GLP_ENOPFS;
            break;
         case 2:
            /* integer overflow occured */
            ret = GLP_ERANGE;
            goto done;
         case 3:
            /* optimality test failed (logic error) */
            ret = GLP_EFAIL;
            goto done;
         default:
            xassert(ret != ret);
      }
      /* store solution components */
      /* (objective function = the total cost) */
      if (sol != NULL)
      {  temp = 0.0;
         for (k = 1; k <= na; k++)
            temp += (double)cost[k] * (double)x[k];
         if (form != GLP_ASN_MIN) temp = - temp;
         *sol = temp;
      }
      /* (arc flows) */
      if (a_x >= 0)
      {  k = 0;
         for (i = 1; i <= G->nv; i++)
         {  v = G->v[i];
            for (a = v->out; a != NULL; a = a->t_next)
            {  k++;
               if (ret == 0)
                  xassert(x[k] == 0 || x[k] == 1);
               memcpy((char *)a->data + a_x, &x[k], sizeof(int));
            }
         }
      }
done: /* free working arrays */
      xfree(tail);
      xfree(head);
      xfree(low);
      xfree(cap);
      xfree(cost);
      xfree(x);
      xfree(pi);
      return ret;
}
Ejemplo n.º 7
0
extern int slurm_ckpt_op (uint32_t job_id, uint32_t step_id,
			  struct step_record *step_ptr, uint16_t op,
			  uint16_t data, char *image_dir, time_t * event_time,
			  uint32_t *error_code, char **error_msg )
{
	int rc = SLURM_SUCCESS;
	struct check_job_info *check_ptr;

	if (!step_ptr)	/* batch job restore */
		return ESLURM_NOT_SUPPORTED;
	check_ptr = (struct check_job_info *)step_ptr->check_job;
	xassert(check_ptr);

	switch (op) {
		case CHECK_ABLE:
			if (check_ptr->disabled)
				rc = ESLURM_DISABLED;
			else {
				if ((check_ptr->reply_cnt < 1) && event_time) {
					/* Return time of last event */
					*event_time = check_ptr->time_stamp;
				}
				rc = SLURM_SUCCESS;
			}
			break;
		case CHECK_DISABLE:
			check_ptr->disabled++;
			break;
		case CHECK_ENABLE:
			check_ptr->disabled--;
			break;
		case CHECK_CREATE:
			check_ptr->time_stamp = time(NULL);
			check_ptr->reply_cnt = 0;
			check_ptr->error_code = 0;
			xfree(check_ptr->error_msg);
			rc = _ckpt_step(step_ptr, data, 0);
			break;
		case CHECK_VACATE:
			check_ptr->time_stamp = time(NULL);
			check_ptr->reply_cnt = 0;
			check_ptr->error_code = 0;
			xfree(check_ptr->error_msg);
			rc = _ckpt_step(step_ptr, data, 1);
			break;
		case CHECK_RESTART:
		case CHECK_REQUEUE:
			/* Lots of work is required in Slurm to restart a
			 * checkpointed job. For now the user can submit a
			 * new job and execute "ompi_restart <snapshot>" */
			rc = ESLURM_NOT_SUPPORTED;
			break;
		case CHECK_ERROR:
			xassert(error_code);
			xassert(error_msg);
			*error_code = check_ptr->error_code;
			xfree(*error_msg);
			*error_msg = xstrdup(check_ptr->error_msg);
			break;
		default:
			error("Invalid checkpoint operation: %d", op);
			rc = EINVAL;
	}

	return rc;
}
Ejemplo n.º 8
0
/* pack_process_mapping()
 */
char *
pack_process_mapping(uint32_t node_cnt,
		     uint32_t task_cnt,
		     uint16_t *tasks,
		     uint32_t **tids)
{
	int offset, i;
	int start_node, end_node;
	char *packing = NULL;

	/* next_task[i] - next process for processing
	 */
	uint16_t *next_task = xmalloc(node_cnt * sizeof(uint16_t));

	packing = xstrdup("(vector");
	offset = 0;
	while (offset < task_cnt) {
		int mapped = 0;
		int depth = -1;
		int j;
		start_node = end_node = 0;

		/* find the task with id == offset
		 */
		for (i = 0; i < node_cnt; i++) {

			if (next_task[i] < tasks[i]) {
				/* if we didn't consume entire
				 * quota on this node
				 */
				xassert(offset >= tids[i][next_task[i]]);
				if (offset == tids[i][next_task[i]]) {
					start_node = i;
					break;
				}
			}
		}

		end_node = node_cnt;
		for (i = start_node; i < end_node; i++) {
			if (next_task[i] >= tasks[i] ) {
				/* Save first non-matching node index
				 * and interrupt loop
				 */
				end_node = i;
				continue;
			}

			for (j = next_task[i]; ((j + 1) < tasks[i])
				     && ((tids[i][j]+1) == tids[i][j+1]); j++);
			j++;
			/* First run determines the depth
			 */
			if (depth < 0) {
				depth = j - next_task[i];
			} else {
				/* If this is not the first node in the bar
				 * check that: 1. First tid on this node is
				 * sequentially next after last tid
				 *    on the previous node
				 */
				if (tids[i-1][next_task[i-1]-1] + 1
				    != tids[i][next_task[i]]) {
					end_node = i;
					continue;
				}
			}

			if (depth == (j - next_task[i])) {
				mapped += depth;
				next_task[i] = j;
			} else {
				/* Save first non-matching node index
				 *
				 * and interrupt loop
				 */
				end_node = i;
			}
		}
		xstrfmtcat(packing,",(%u,%u,%u)",
			   start_node, end_node - start_node, depth);
		offset += mapped;
	}
	xstrcat(packing,")");
	return packing;
}
Ejemplo n.º 9
0
/*
 * The remainder of this file implements the standard SLURM checkpoint API.
 */
extern int slurm_ckpt_op (uint32_t job_id, uint32_t step_id,
			  struct step_record *step_ptr, uint16_t op,
			  uint16_t data, char *image_dir, time_t * event_time,
			  uint32_t *error_code, char **error_msg )
{
	int rc = SLURM_SUCCESS;
	struct check_job_info *check_ptr;
	uint16_t done_sig = 0;
	struct job_record *job_ptr;
	struct node_record *node_ptr;
	pthread_attr_t attr;
	pthread_t ckpt_agent_tid = 0;
	char *nodelist;
	struct ckpt_req *req_ptr;

	/* job/step checked already */
	job_ptr = find_job_record(job_id);
	if (!job_ptr)
		return ESLURM_INVALID_JOB_ID;
	if (step_id == SLURM_BATCH_SCRIPT) {
		check_ptr = (struct check_job_info *)job_ptr->check_job;
		node_ptr = find_first_node_record(job_ptr->node_bitmap);
		nodelist = node_ptr->name;
	} else {
		step_ptr = find_step_record(job_ptr, step_id);
		if (!step_ptr)
			return ESLURM_INVALID_JOB_ID;
		check_ptr = (struct check_job_info *)step_ptr->check_job;
		nodelist = step_ptr->step_layout->node_list;
	}
	xassert(check_ptr);

	switch (op) {
	case CHECK_ABLE:
		if (check_ptr->disabled)
			rc = ESLURM_DISABLED;
		else {
			*event_time = check_ptr->time_stamp;
			rc = SLURM_SUCCESS;
		}
		break;
	case CHECK_DISABLE:
		check_ptr->disabled++;
		break;
	case CHECK_ENABLE:
		check_ptr->disabled--;
		break;
	case CHECK_REQUEUE:
		if (step_id != SLURM_BATCH_SCRIPT) {
			rc = ESLURM_NOT_SUPPORTED;
			break;
		}
		/* no break */
	case CHECK_VACATE:
		done_sig = SIGTERM;
		/* no break */
	case CHECK_CREATE:
		if (check_ptr->disabled) {
			rc = ESLURM_DISABLED;
			break;
		}
		if (check_ptr->time_stamp != 0) {
			rc = EALREADY;
			break;
		}

		check_ptr->time_stamp = time(NULL);
		check_ptr->error_code = 0;
		xfree(check_ptr->error_msg);

		req_ptr = xmalloc(sizeof(struct ckpt_req));
		if (!req_ptr) {
			rc = ENOMEM;
			break;
		}
		req_ptr->gid = job_ptr->group_id;
		req_ptr->uid = job_ptr->user_id;
		req_ptr->job_id = job_id;
		req_ptr->step_id = step_id;
		req_ptr->begin_time = check_ptr->time_stamp;
		req_ptr->wait = data;
		req_ptr->image_dir = xstrdup(image_dir);
		req_ptr->nodelist = xstrdup(nodelist);
		req_ptr->sig_done = done_sig;
		req_ptr->op = op;

		slurm_attr_init(&attr);
		if (pthread_attr_setdetachstate(&attr,
						PTHREAD_CREATE_DETACHED)) {
			error("pthread_attr_setdetachstate: %m");
			rc = errno;
			break;
		}

		if (pthread_create(&ckpt_agent_tid, &attr, _ckpt_agent_thr,
				   req_ptr)) {
			error("pthread_create: %m");
			rc = errno;
			break;
		}
		slurm_attr_destroy(&attr);

		break;

	case CHECK_RESTART:
		if (step_id != SLURM_BATCH_SCRIPT) {
			rc = ESLURM_NOT_SUPPORTED;
			break;
		}
		/* create a batch job from saved desc */
		rc = ESLURM_NOT_SUPPORTED;
		/* TODO: save job script */
		break;

	case CHECK_ERROR:
		xassert(error_code);
		xassert(error_msg);
		*error_code = check_ptr->error_code;
		xfree(*error_msg);
		*error_msg = xstrdup(check_ptr->error_msg);
		break;
	default:
		error("Invalid checkpoint operation: %d", op);
		rc = EINVAL;
	}

	return rc;
}
Ejemplo n.º 10
0
extern int switch_p_libstate_restore(char *dir_name, bool recover)
{
#ifdef HAVE_NATIVE_CRAY
	char *data = NULL, *file_name;
	Buf buffer = NULL;
	int error_code = SLURM_SUCCESS;
	int state_fd, data_allocated = 0, data_read = 0, data_size = 0;

	xassert(dir_name != NULL);

	if (debug_flags & DEBUG_FLAG_SWITCH) {
		CRAY_INFO("restore from %s, recover %d",
			  dir_name,  (int) recover);
	}

	if (!recover)		/* clean start, no recovery */
		return SLURM_SUCCESS;

	file_name = xstrdup(dir_name);
	xstrcat(file_name, "/switch_cray_state");
	state_fd = open (file_name, O_RDONLY);
	if (state_fd >= 0) {
		data_allocated = SWITCH_BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read (state_fd, &data[data_size],
					  SWITCH_BUF_SIZE);
			if ((data_read < 0) && (errno == EINTR))
				continue;
			if (data_read < 0) {
				CRAY_ERR("Read error on %s, %m", file_name);
				error_code = SLURM_ERROR;
				break;
			} else if (data_read == 0)
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close (state_fd);
		(void) unlink(file_name);	/* One chance to recover */
		xfree(file_name);
	} else {
		CRAY_ERR("No %s file for switch/cray state recovery",
			 file_name);
		CRAY_ERR("Starting switch/cray with clean state");
		xfree(file_name);
		return SLURM_SUCCESS;
	}

	if (error_code == SLURM_SUCCESS) {
		buffer = create_buf (data, data_size);
		data = NULL;	/* now in buffer, don't xfree() */
		_state_read_buf(buffer);
	}

	if (buffer)
		free_buf(buffer);
	xfree(data);
#endif
	return SLURM_SUCCESS;
}
Ejemplo n.º 11
0
/*
 * start_msg_tree  - logic to begin the forward tree and
 *                   accumulate the return codes from processes getting the
 *                   the forwarded message
 *
 * IN: hl          - hostlist_t   - list of every node to send message to
 * IN: msg         - slurm_msg_t  - message to send.
 * IN: timeout     - int          - how long to wait in milliseconds.
 * RET List 	   - List containing the responses of the childern
 *		     (if any) we forwarded the message to. List
 *		     containing type (ret_data_info_t).
 */
extern List start_msg_tree(hostlist_t hl, slurm_msg_t *msg, int timeout)
{
	int *span = NULL;
	fwd_tree_t *fwd_tree = NULL;
	pthread_mutex_t tree_mutex;
	pthread_cond_t notify;
	int j = 0, count = 0;
	List ret_list = NULL;
	char *name = NULL;
	int thr_count = 0;
	int host_count = 0;

	xassert(hl);
	xassert(msg);

	hostlist_uniq(hl);
	host_count = hostlist_count(hl);

	span = set_span(host_count, 0);

	slurm_mutex_init(&tree_mutex);
	pthread_cond_init(&notify, NULL);

	ret_list = list_create(destroy_data_info);

	while ((name = hostlist_shift(hl))) {
		pthread_attr_t attr_agent;
		pthread_t thread_agent;
		int retries = 0;

		slurm_attr_init(&attr_agent);
		if (pthread_attr_setdetachstate
		    (&attr_agent, PTHREAD_CREATE_DETACHED))
			error("pthread_attr_setdetachstate error %m");

		fwd_tree = xmalloc(sizeof(fwd_tree_t));
		fwd_tree->orig_msg = msg;
		fwd_tree->ret_list = ret_list;
		fwd_tree->timeout = timeout;
		fwd_tree->notify = &notify;
		fwd_tree->tree_mutex = &tree_mutex;

		if(fwd_tree->timeout <= 0) {
			/* convert secs to msec */
			fwd_tree->timeout  = slurm_get_msg_timeout() * 1000;
		}

		fwd_tree->tree_hl = hostlist_create(name);
		free(name);
		for (j = 0; j < span[thr_count]; j++) {
			name = hostlist_shift(hl);
			if (!name)
				break;
			hostlist_push(fwd_tree->tree_hl, name);
			free(name);
		}

		while (pthread_create(&thread_agent, &attr_agent,
				      _fwd_tree_thread, (void *)fwd_tree)) {
			error("pthread_create error %m");
			if (++retries > MAX_RETRIES)
				fatal("Can't create pthread");
			sleep(1);	/* sleep and try again */
		}
		slurm_attr_destroy(&attr_agent);
		thr_count++;
	}
	xfree(span);

	slurm_mutex_lock(&tree_mutex);

	count = list_count(ret_list);
	debug2("Tree head got back %d looking for %d", count, host_count);
	while ((count < host_count)) {
		pthread_cond_wait(&notify, &tree_mutex);
		count = list_count(ret_list);
		debug2("Tree head got back %d", count);
	}
	debug2("Tree head got them all");
	slurm_mutex_unlock(&tree_mutex);

	slurm_mutex_destroy(&tree_mutex);
	pthread_cond_destroy(&notify);

	return ret_list;
}
Ejemplo n.º 12
0
static void parse_bounds(struct csa *csa)
{     int j, lb_flag;
      double lb, s;
      /* parse the keyword 'bounds' */
      xassert(csa->token == T_BOUNDS);
      scan_token(csa);
loop: /* bound definition can start with a sign, numeric constant, or
         a symbolic name */
      if (!(csa->token == T_PLUS || csa->token == T_MINUS ||
            csa->token == T_NUMBER || csa->token == T_NAME)) goto done;
      /* parse bound definition */
      if (csa->token == T_PLUS || csa->token == T_MINUS)
      {  /* parse signed lower bound */
         lb_flag = 1;
         s = (csa->token == T_PLUS ? +1.0 : -1.0);
         scan_token(csa);
         if (csa->token == T_NUMBER)
            lb = s * csa->value, scan_token(csa);
         else if (the_same(csa->image, "infinity") ||
                  the_same(csa->image, "inf"))
         {  if (s > 0.0)
               error(csa, "invalid use of '+inf' as lower bound\n");
            lb = -DBL_MAX, scan_token(csa);
         }
         else
            error(csa, "missing lower bound\n");
      }
      else if (csa->token == T_NUMBER)
      {  /* parse unsigned lower bound */
         lb_flag = 1;
         lb = csa->value, scan_token(csa);
      }
      else
      {  /* lower bound is not specified */
         lb_flag = 0;
      }
      /* parse the token that should follow the lower bound */
      if (lb_flag)
      {  if (csa->token != T_LE)
            error(csa, "missing '<', '<=', or '=<' after lower bound\n")
               ;
         scan_token(csa);
      }
      /* parse variable name */
      if (csa->token != T_NAME)
         error(csa, "missing variable name\n");
      j = find_col(csa, csa->image);
      /* set lower bound */
      if (lb_flag) set_lower_bound(csa, j, lb);
      scan_token(csa);
      /* parse the context that follows the variable name */
      if (csa->token == T_LE)
      {  /* parse upper bound */
         scan_token(csa);
         if (csa->token == T_PLUS || csa->token == T_MINUS)
         {  /* parse signed upper bound */
            s = (csa->token == T_PLUS ? +1.0 : -1.0);
            scan_token(csa);
            if (csa->token == T_NUMBER)
            {  set_upper_bound(csa, j, s * csa->value);
               scan_token(csa);
            }
            else if (the_same(csa->image, "infinity") ||
                     the_same(csa->image, "inf"))
            {  if (s < 0.0)
                  error(csa, "invalid use of '-inf' as upper bound\n");
               set_upper_bound(csa, j, +DBL_MAX);
               scan_token(csa);
            }
            else
               error(csa, "missing upper bound\n");
         }
         else if (csa->token == T_NUMBER)
         {  /* parse unsigned upper bound */
            set_upper_bound(csa, j, csa->value);
            scan_token(csa);
         }
         else
            error(csa, "missing upper bound\n");
      }
      else if (csa->token == T_GE)
      {  /* parse lower bound */
         if (lb_flag)
         {  /* the context '... <= x >= ...' is invalid */
            error(csa, "invalid bound definition\n");
         }
         scan_token(csa);
         if (csa->token == T_PLUS || csa->token == T_MINUS)
         {  /* parse signed lower bound */
            s = (csa->token == T_PLUS ? +1.0 : -1.0);
            scan_token(csa);
            if (csa->token == T_NUMBER)
            {  set_lower_bound(csa, j, s * csa->value);
               scan_token(csa);
            }
            else if (the_same(csa->image, "infinity") ||
                     the_same(csa->image, "inf") == 0)
            {  if (s > 0.0)
                  error(csa, "invalid use of '+inf' as lower bound\n");
               set_lower_bound(csa, j, -DBL_MAX);
               scan_token(csa);
            }
            else
               error(csa, "missing lower bound\n");
         }
         else if (csa->token == T_NUMBER)
         {  /* parse unsigned lower bound */
            set_lower_bound(csa, j, csa->value);
            scan_token(csa);
         }
         else
            error(csa, "missing lower bound\n");
      }
      else if (csa->token == T_EQ)
      {  /* parse fixed value */
         if (lb_flag)
         {  /* the context '... <= x = ...' is invalid */
            error(csa, "invalid bound definition\n");
         }
         scan_token(csa);
         if (csa->token == T_PLUS || csa->token == T_MINUS)
         {  /* parse signed fixed value */
            s = (csa->token == T_PLUS ? +1.0 : -1.0);
            scan_token(csa);
            if (csa->token == T_NUMBER)
            {  set_lower_bound(csa, j, s * csa->value);
               set_upper_bound(csa, j, s * csa->value);
               scan_token(csa);
            }
            else
               error(csa, "missing fixed value\n");
         }
         else if (csa->token == T_NUMBER)
         {  /* parse unsigned fixed value */
            set_lower_bound(csa, j, csa->value);
            set_upper_bound(csa, j, csa->value);
            scan_token(csa);
         }
         else
            error(csa, "missing fixed value\n");
      }
      else if (the_same(csa->image, "free"))
      {  /* parse the keyword 'free' */
         if (lb_flag)
         {  /* the context '... <= x free ...' is invalid */
            error(csa, "invalid bound definition\n");
         }
         set_lower_bound(csa, j, -DBL_MAX);
         set_upper_bound(csa, j, +DBL_MAX);
         scan_token(csa);
      }
      else if (!lb_flag)
      {  /* neither lower nor upper bounds are specified */
         error(csa, "invalid bound definition\n");
      }
      goto loop;
done: return;
}
Ejemplo n.º 13
0
static void check_parm(const char *func, const glp_cpxcp *parm)
{     /* check control parameters */
      xassert(func != NULL);
      xassert(parm != NULL);
      return;
}
Ejemplo n.º 14
0
void glp_init_cpxcp(glp_cpxcp *parm)
{     xassert(parm != NULL);
      return;
}
Ejemplo n.º 15
0
int glp_maxflow_ffalg(glp_graph *G, int s, int t, int a_cap,
      double *sol, int a_x, int v_cut)
{     /* find maximal flow with Ford-Fulkerson algorithm */
      glp_vertex *v;
      glp_arc *a;
      int nv, na, i, k, flag, *tail, *head, *cap, *x, ret;
      char *cut;
      double temp;
      if (!(1 <= s && s <= G->nv))
         xerror("glp_maxflow_ffalg: s = %d; source node number out of r"
            "ange\n", s);
      if (!(1 <= t && t <= G->nv))
         xerror("glp_maxflow_ffalg: t = %d: sink node number out of ran"
            "ge\n", t);
      if (s == t)
         xerror("glp_maxflow_ffalg: s = t = %d; source and sink nodes m"
            "ust be distinct\n", s);
      if (a_cap >= 0 && a_cap > G->a_size - (int)sizeof(double))
         xerror("glp_maxflow_ffalg: a_cap = %d; invalid offset\n",
            a_cap);
      if (v_cut >= 0 && v_cut > G->v_size - (int)sizeof(int))
         xerror("glp_maxflow_ffalg: v_cut = %d; invalid offset\n",
            v_cut);
      /* allocate working arrays */
      nv = G->nv;
      na = G->na;
      tail = xcalloc(1+na, sizeof(int));
      head = xcalloc(1+na, sizeof(int));
      cap = xcalloc(1+na, sizeof(int));
      x = xcalloc(1+na, sizeof(int));
      if (v_cut < 0)
         cut = NULL;
      else
         cut = xcalloc(1+nv, sizeof(char));
      /* copy the flow network */
      k = 0;
      for (i = 1; i <= G->nv; i++)
      {  v = G->v[i];
         for (a = v->out; a != NULL; a = a->t_next)
         {  k++;
            tail[k] = a->tail->i;
            head[k] = a->head->i;
            if (tail[k] == head[k])
            {  ret = GLP_EDATA;
               goto done;
            }
            if (a_cap >= 0)
               memcpy(&temp, (char *)a->data + a_cap, sizeof(double));
            else
               temp = 1.0;
            if (!(0.0 <= temp && temp <= (double)INT_MAX &&
                  temp == floor(temp)))
            {  ret = GLP_EDATA;
               goto done;
            }
            cap[k] = (int)temp;
         }
      }
      xassert(k == na);
      /* find maximal flow in the flow network */
      ffalg(nv, na, tail, head, s, t, cap, x, cut);
      ret = 0;
      /* store solution components */
      /* (objective function = total flow through the network) */
      if (sol != NULL)
      {  temp = 0.0;
         for (k = 1; k <= na; k++)
         {  if (tail[k] == s)
               temp += (double)x[k];
            else if (head[k] == s)
               temp -= (double)x[k];
         }
         *sol = temp;
      }
      /* (arc flows) */
      if (a_x >= 0)
      {  k = 0;
         for (i = 1; i <= G->nv; i++)
         {  v = G->v[i];
            for (a = v->out; a != NULL; a = a->t_next)
            {  temp = (double)x[++k];
               memcpy((char *)a->data + a_x, &temp, sizeof(double));
            }
         }
      }
      /* (node flags) */
      if (v_cut >= 0)
      {  for (i = 1; i <= G->nv; i++)
         {  v = G->v[i];
            flag = cut[i];
            memcpy((char *)v->data + v_cut, &flag, sizeof(int));
         }
      }
done: /* free working arrays */
      xfree(tail);
      xfree(head);
      xfree(cap);
      xfree(x);
      if (cut != NULL) xfree(cut);
      return ret;
}
Ejemplo n.º 16
0
static void yfree(void *ptr)
{     xassert(ptr != NULL);
      free(ptr);
      return;
}
Ejemplo n.º 17
0
void glp_mincost_lp(glp_prob *lp, glp_graph *G, int names, int v_rhs,
      int a_low, int a_cap, int a_cost)
{     glp_vertex *v;
      glp_arc *a;
      int i, j, type, ind[1+2];
      double rhs, low, cap, cost, val[1+2];
      if (!(names == GLP_ON || names == GLP_OFF))
         xerror("glp_mincost_lp: names = %d; invalid parameter\n",
            names);
      if (v_rhs >= 0 && v_rhs > G->v_size - (int)sizeof(double))
         xerror("glp_mincost_lp: v_rhs = %d; invalid offset\n", v_rhs);
      if (a_low >= 0 && a_low > G->a_size - (int)sizeof(double))
         xerror("glp_mincost_lp: a_low = %d; invalid offset\n", a_low);
      if (a_cap >= 0 && a_cap > G->a_size - (int)sizeof(double))
         xerror("glp_mincost_lp: a_cap = %d; invalid offset\n", a_cap);
      if (a_cost >= 0 && a_cost > G->a_size - (int)sizeof(double))
         xerror("glp_mincost_lp: a_cost = %d; invalid offset\n", a_cost)
            ;
      glp_erase_prob(lp);
      if (names) glp_set_prob_name(lp, G->name);
      if (G->nv > 0) glp_add_rows(lp, G->nv);
      for (i = 1; i <= G->nv; i++)
      {  v = G->v[i];
         if (names) glp_set_row_name(lp, i, v->name);
         if (v_rhs >= 0)
            memcpy(&rhs, (char *)v->data + v_rhs, sizeof(double));
         else
            rhs = 0.0;
         glp_set_row_bnds(lp, i, GLP_FX, rhs, rhs);
      }
      if (G->na > 0) glp_add_cols(lp, G->na);
      for (i = 1, j = 0; i <= G->nv; i++)
      {  v = G->v[i];
         for (a = v->out; a != NULL; a = a->t_next)
         {  j++;
            if (names)
            {  char name[50+1];
               sprintf(name, "x[%d,%d]", a->tail->i, a->head->i);
               xassert(strlen(name) < sizeof(name));
               glp_set_col_name(lp, j, name);
            }
            if (a->tail->i != a->head->i)
            {  ind[1] = a->tail->i, val[1] = +1.0;
               ind[2] = a->head->i, val[2] = -1.0;
               glp_set_mat_col(lp, j, 2, ind, val);
            }
            if (a_low >= 0)
               memcpy(&low, (char *)a->data + a_low, sizeof(double));
            else
               low = 0.0;
            if (a_cap >= 0)
               memcpy(&cap, (char *)a->data + a_cap, sizeof(double));
            else
               cap = 1.0;
            if (cap == DBL_MAX)
               type = GLP_LO;
            else if (low != cap)
               type = GLP_DB;
            else
               type = GLP_FX;
            glp_set_col_bnds(lp, j, type, low, cap);
            if (a_cost >= 0)
               memcpy(&cost, (char *)a->data + a_cost, sizeof(double));
            else
               cost = 0.0;
            glp_set_obj_coef(lp, j, cost);
         }
      }
      xassert(j == G->na);
      return;
}
Ejemplo n.º 18
0
/* dump_all_front_end_state - save the state of all front_end nodes to file */
extern int dump_all_front_end_state(void)
{
#ifdef HAVE_FRONT_END
    /* Save high-water mark to avoid buffer growth with copies */
    static int high_buffer_size = (1024 * 1024);
    int error_code = 0, i, log_fd;
    char *old_file, *new_file, *reg_file;
    front_end_record_t *front_end_ptr;
    /* Locks: Read config and node */
    slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK,
                                        NO_LOCK
                                      };
    Buf buffer = init_buf(high_buffer_size);
    DEF_TIMERS;

    START_TIMER;
    /* write header: version, time */
    packstr(FRONT_END_STATE_VERSION, buffer);
    pack_time(time(NULL), buffer);

    /* write node records to buffer */
    lock_slurmctld (node_read_lock);

    for (i = 0, front_end_ptr = front_end_nodes;
            i < front_end_node_cnt; i++, front_end_ptr++) {
        xassert(front_end_ptr->magic == FRONT_END_MAGIC);
        _dump_front_end_state(front_end_ptr, buffer);
    }

    old_file = xstrdup (slurmctld_conf.state_save_location);
    xstrcat (old_file, "/front_end_state.old");
    reg_file = xstrdup (slurmctld_conf.state_save_location);
    xstrcat (reg_file, "/front_end_state");
    new_file = xstrdup (slurmctld_conf.state_save_location);
    xstrcat (new_file, "/front_end_state.new");
    unlock_slurmctld (node_read_lock);

    /* write the buffer to file */
    lock_state_files();
    log_fd = creat (new_file, 0600);
    if (log_fd < 0) {
        error ("Can't save state, error creating file %s %m", new_file);
        error_code = errno;
    } else {
        int pos = 0, nwrite = get_buf_offset(buffer), amount, rc;
        char *data = (char *)get_buf_data(buffer);
        high_buffer_size = MAX(nwrite, high_buffer_size);
        while (nwrite > 0) {
            amount = write(log_fd, &data[pos], nwrite);
            if ((amount < 0) && (errno != EINTR)) {
                error("Error writing file %s, %m", new_file);
                error_code = errno;
                break;
            }
            nwrite -= amount;
            pos    += amount;
        }

        rc = fsync_and_close(log_fd, "front_end");
        if (rc && !error_code)
            error_code = rc;
    }
    if (error_code)
        (void) unlink (new_file);
    else {	/* file shuffle */
        (void) unlink (old_file);
        if (link(reg_file, old_file))
            debug4("unable to create link for %s -> %s: %m",
                   reg_file, old_file);
        (void) unlink (reg_file);
        if (link(new_file, reg_file))
            debug4("unable to create link for %s -> %s: %m",
                   new_file, reg_file);
        (void) unlink (new_file);
    }
    xfree (old_file);
    xfree (reg_file);
    xfree (new_file);
    unlock_state_files ();

    free_buf (buffer);
    END_TIMER2("dump_all_front_end_state");
    return error_code;
#else
    return SLURM_SUCCESS;
#endif
}
Ejemplo n.º 19
0
int glp_asnprob_hall(glp_graph *G, int v_set, int a_x)
{     glp_vertex *v;
      glp_arc *a;
      int card, i, k, loc, n, n1, n2, xij;
      int *num, *icn, *ip, *lenr, *iperm, *pr, *arp, *cv, *out;
      if (v_set >= 0 && v_set > G->v_size - (int)sizeof(int))
         xerror("glp_asnprob_hall: v_set = %d; invalid offset\n",
            v_set);
      if (a_x >= 0 && a_x > G->a_size - (int)sizeof(int))
         xerror("glp_asnprob_hall: a_x = %d; invalid offset\n", a_x);
      if (glp_check_asnprob(G, v_set))
         return -1;
      /* determine the number of vertices in sets R and S and renumber
         vertices in S which correspond to columns of the matrix; skip
         all isolated vertices */
      num = xcalloc(1+G->nv, sizeof(int));
      n1 = n2 = 0;
      for (i = 1; i <= G->nv; i++)
      {  v = G->v[i];
         if (v->in == NULL && v->out != NULL)
            n1++, num[i] = 0; /* vertex in R */
         else if (v->in != NULL && v->out == NULL)
            n2++, num[i] = n2; /* vertex in S */
         else
         {  xassert(v->in == NULL && v->out == NULL);
            num[i] = -1; /* isolated vertex */
         }
      }
      /* the matrix must be square, thus, if it has more columns than
         rows, extra rows will be just empty, and vice versa */
      n = (n1 >= n2 ? n1 : n2);
      /* allocate working arrays */
      icn = xcalloc(1+G->na, sizeof(int));
      ip = xcalloc(1+n, sizeof(int));
      lenr = xcalloc(1+n, sizeof(int));
      iperm = xcalloc(1+n, sizeof(int));
      pr = xcalloc(1+n, sizeof(int));
      arp = xcalloc(1+n, sizeof(int));
      cv = xcalloc(1+n, sizeof(int));
      out = xcalloc(1+n, sizeof(int));
      /* build the adjacency matrix of the bipartite graph in row-wise
         format (rows are vertices in R, columns are vertices in S) */
      k = 0, loc = 1;
      for (i = 1; i <= G->nv; i++)
      {  if (num[i] != 0) continue;
         /* vertex i in R */
         ip[++k] = loc;
         v = G->v[i];
         for (a = v->out; a != NULL; a = a->t_next)
         {  xassert(num[a->head->i] != 0);
            icn[loc++] = num[a->head->i];
         }
         lenr[k] = loc - ip[k];
      }
      xassert(loc-1 == G->na);
      /* make all extra rows empty (all extra columns are empty due to
         the row-wise format used) */
      for (k++; k <= n; k++)
         ip[k] = loc, lenr[k] = 0;
      /* find a row permutation that maximizes the number of non-zeros
         on the main diagonal */
      card = mc21a(n, icn, ip, lenr, iperm, pr, arp, cv, out);
#if 1 /* 18/II-2010 */
      /* FIXED: if card = n, arp remains clobbered on exit */
      for (i = 1; i <= n; i++)
         arp[i] = 0;
      for (i = 1; i <= card; i++)
      {  k = iperm[i];
         xassert(1 <= k && k <= n);
         xassert(arp[k] == 0);
         arp[k] = i;
      }
#endif
      /* store solution, if necessary */
      if (a_x < 0) goto skip;
      k = 0;
      for (i = 1; i <= G->nv; i++)
      {  if (num[i] != 0) continue;
         /* vertex i in R */
         k++;
         v = G->v[i];
         for (a = v->out; a != NULL; a = a->t_next)
         {  /* arp[k] is the number of matched column or zero */
            if (arp[k] == num[a->head->i])
            {  xassert(arp[k] != 0);
               xij = 1;
            }
            else
               xij = 0;
            memcpy((char *)a->data + a_x, &xij, sizeof(int));
         }
      }
skip: /* free working arrays */
      xfree(num);
      xfree(icn);
      xfree(ip);
      xfree(lenr);
      xfree(iperm);
      xfree(pr);
      xfree(arp);
      xfree(cv);
      xfree(out);
      return card;
}
Ejemplo n.º 20
0
/*
 * Return a list of plugin names that match the given type.
 *
 * IN plugin_type - Type of plugin to search for in the plugin_dir.
 * RET list of plugin names, NULL if none found.
 */
extern List plugin_get_plugins_of_type(char *plugin_type)
{
	List plugin_names = NULL;
	char *plugin_dir = NULL, *dir = NULL, *save_ptr = NULL;
	char *type_under = NULL, *type_slash = NULL;
	DIR *dirp;
	struct dirent *e;
	int len;

	if (!(plugin_dir = slurm_get_plugin_dir())) {
		error("%s: No plugin dir given", __func__);
		goto done;
	}

	type_under = xstrdup_printf("%s_", plugin_type);
	type_slash = xstrdup_printf("%s/", plugin_type);

	dir = strtok_r(plugin_dir, ":", &save_ptr);
	while (dir) {
		/* Open the directory. */
		if (!(dirp = opendir(dir))) {
			error("cannot open plugin directory %s", dir);
			goto done;
		}

		while (1) {
			char full_name[128];

			if (!(e = readdir( dirp )))
				break;
			/* Check only files with "plugintype_" in them. */
			if (xstrncmp(e->d_name, type_under, strlen(type_under)))
				continue;

			len = strlen(e->d_name);
			len -= 3;
			/* Check only shared object files */
			if (xstrcmp(e->d_name+len, ".so"))
				continue;
			/* add one for the / */
			len++;
			xassert(len < sizeof(full_name));
			snprintf(full_name, len, "%s%s",
				 type_slash, e->d_name + strlen(type_slash));

			if (!plugin_names)
				plugin_names = list_create(slurm_destroy_char);
			if (!list_find_first(plugin_names,
					     slurm_find_char_in_list,
					     full_name))
				list_append(plugin_names, xstrdup(full_name));
		}
		closedir(dirp);

		dir = strtok_r(NULL, ":", &save_ptr);
	}

done:
	xfree(plugin_dir);
	xfree(type_under);
	xfree(type_slash);

	return plugin_names;
}
Ejemplo n.º 21
0
/*
 * slurm_job_step_get_pids - get the complete list of pids for a given
 *      job step
 *
 * IN job_id
 * IN step_id
 * IN node_list, optional, if NULL then all nodes in step are returned.
 * OUT resp
 * RET SLURM_SUCCESS on success SLURM_ERROR else
 */
extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id,
				   char *node_list,
				   job_step_pids_response_msg_t **resp)
{
        int rc = SLURM_SUCCESS;
        slurm_msg_t req_msg;
        job_step_id_msg_t req;
        ListIterator itr;
        List ret_list = NULL;
        ret_data_info_t *ret_data_info = NULL;
	slurm_step_layout_t *step_layout = NULL;
	job_step_pids_response_msg_t *resp_out;
	bool created = 0;

	xassert(resp);

	if (!node_list) {
		if (!(step_layout =
		     slurm_job_step_layout_get(job_id, step_id))) {
			rc = errno;
			error("slurm_job_step_get_pids: "
			      "problem getting step_layout for %u.%u: %s",
			      job_id, step_id, slurm_strerror(rc));
			return rc;
		}
		node_list = step_layout->node_list;
	}

	if (!*resp) {
		resp_out = xmalloc(sizeof(job_step_pids_response_msg_t));
		*resp = resp_out;
		created = 1;
	} else
		resp_out = *resp;

        debug("slurm_job_step_get_pids: "
	      "getting pid information of job %u.%u on nodes %s",
              job_id, step_id, node_list);

	slurm_msg_t_init(&req_msg);

	memset(&req, 0, sizeof(job_step_id_msg_t));
        resp_out->job_id = req.job_id = job_id;
	resp_out->step_id = req.step_id = step_id;

	req_msg.msg_type = REQUEST_JOB_STEP_PIDS;
        req_msg.data = &req;

        if (!(ret_list = slurm_send_recv_msgs(node_list,
					     &req_msg, 0, false))) {
                error("slurm_job_step_get_pids: got an error no list returned");
                rc = SLURM_ERROR;
		if (created) {
			slurm_job_step_pids_response_msg_free(resp_out);
			*resp = NULL;
		}
		goto cleanup;
        }

        itr = list_iterator_create(ret_list);
        while((ret_data_info = list_next(itr))) {
                switch (ret_data_info->type) {
			case RESPONSE_JOB_STEP_PIDS:
				if (!resp_out->pid_list)
					resp_out->pid_list = list_create(
						slurm_free_job_step_pids);
				list_push(resp_out->pid_list,
					  ret_data_info->data);
				ret_data_info->data = NULL;
                              break;
                      case RESPONSE_SLURM_RC:
                              rc = slurm_get_return_code(ret_data_info->type,
                                                         ret_data_info->data);
                              error("slurm_job_step_get_pids: "
				    "there was an error with the "
				    "list pid request rc = %s",
                                    slurm_strerror(rc));
                              break;
                      default:
                              rc = slurm_get_return_code(ret_data_info->type,
                                                         ret_data_info->data);
                              error("slurm_job_step_get_pids: "
				    "unknown return given %d rc = %s",
                                    ret_data_info->type, slurm_strerror(rc));
                              break;
                }
        }
        list_iterator_destroy(itr);
        FREE_NULL_LIST(ret_list);

 	if (resp_out->pid_list)
		list_sort(resp_out->pid_list, (ListCmpF)_sort_pids_by_name);
cleanup:
	slurm_step_layout_destroy(step_layout);

        return rc;
}
Ejemplo n.º 22
0
/*
 * srun_user_message - Send arbitrary message to an srun job (no job steps)
 */
extern int srun_user_message(struct job_record *job_ptr, char *msg)
{
	slurm_addr_t * addr;
	srun_user_msg_t *msg_arg;

	xassert(job_ptr);
	if (!IS_JOB_PENDING(job_ptr) && !IS_JOB_RUNNING(job_ptr))
		return ESLURM_ALREADY_DONE;

	if (job_ptr->other_port &&
	    job_ptr->resp_host && job_ptr->resp_host[0]) {
		addr = xmalloc(sizeof(struct sockaddr_in));
		slurm_set_addr(addr, job_ptr->other_port, job_ptr->resp_host);
		msg_arg = xmalloc(sizeof(srun_user_msg_t));
		msg_arg->job_id = job_ptr->job_id;
		msg_arg->msg    = xstrdup(msg);
		_srun_agent_launch(addr, job_ptr->resp_host, SRUN_USER_MSG,
				   msg_arg, job_ptr->start_protocol_ver);
		return SLURM_SUCCESS;
	} else if (job_ptr->batch_flag && IS_JOB_RUNNING(job_ptr)) {
#ifndef HAVE_FRONT_END
		struct node_record *node_ptr;
#endif
		job_notify_msg_t *notify_msg_ptr;
		agent_arg_t *agent_arg_ptr;
#ifdef HAVE_FRONT_END
		if (job_ptr->batch_host == NULL)
			return ESLURM_DISABLED;	/* no allocated nodes */
		agent_arg_ptr = (agent_arg_t *) xmalloc(sizeof(agent_arg_t));
		agent_arg_ptr->hostlist = hostlist_create(job_ptr->batch_host);
		if (!agent_arg_ptr->hostlist)
			fatal("Invalid srun host: %s", job_ptr->batch_host);

		if (job_ptr->front_end_ptr)
			agent_arg_ptr->protocol_version =
				job_ptr->front_end_ptr->protocol_version;

#else
		node_ptr = find_first_node_record(job_ptr->node_bitmap);
		if (node_ptr == NULL)
			return ESLURM_DISABLED;	/* no allocated nodes */
		agent_arg_ptr = (agent_arg_t *) xmalloc(sizeof(agent_arg_t));
		agent_arg_ptr->hostlist = hostlist_create(node_ptr->name);
		agent_arg_ptr->protocol_version = node_ptr->protocol_version;
		if (!agent_arg_ptr->hostlist)
			fatal("Invalid srun host: %s", node_ptr->name);
#endif
		notify_msg_ptr = (job_notify_msg_t *)
				 xmalloc(sizeof(job_notify_msg_t));
		notify_msg_ptr->job_id = job_ptr->job_id;
		notify_msg_ptr->message = xstrdup(msg);
		agent_arg_ptr->node_count = 1;
		agent_arg_ptr->retry = 0;
		agent_arg_ptr->msg_type = REQUEST_JOB_NOTIFY;
		agent_arg_ptr->msg_args = (void *) notify_msg_ptr;
		/* Launch the RPC via agent */
		agent_queue_request(agent_arg_ptr);
		return SLURM_SUCCESS;
	}
	return ESLURM_DISABLED;
}
Ejemplo n.º 23
0
extern void jag_common_poll_data(
	List task_list, bool pgid_plugin, uint64_t cont_id,
	jag_callbacks_t *callbacks, bool profile)
{
	/* Update the data */
	List prec_list = NULL;
	uint64_t total_job_mem = 0, total_job_vsize = 0;
	ListIterator itr;
	jag_prec_t *prec = NULL;
	struct jobacctinfo *jobacct = NULL;
	static int processing = 0;
	char sbuf[72];
	int energy_counted = 0;
	time_t ct;
	static int over_memory_kill = -1;
	int i = 0;

	xassert(callbacks);

	if (!pgid_plugin && (cont_id == NO_VAL64)) {
		debug("cont_id hasn't been set yet not running poll");
		return;
	}

	if (processing) {
		debug("already running, returning");
		return;
	}
	processing = 1;

	if (!callbacks->get_precs)
		callbacks->get_precs = _get_precs;

	ct = time(NULL);
	prec_list = (*(callbacks->get_precs))(task_list, pgid_plugin, cont_id,
					      callbacks);

	if (!list_count(prec_list) || !task_list || !list_count(task_list))
		goto finished;	/* We have no business being here! */

	itr = list_iterator_create(task_list);
	while ((jobacct = list_next(itr))) {
		double cpu_calc;
		double last_total_cputime;
		if (!(prec = list_find_first(prec_list, _find_prec, jobacct)))
			continue;

		/*
		 * Only jobacct_gather/cgroup uses prec_extra, and we want to
		 * make sure we call it once per task, so call it here as we
		 * iterate through the tasks instead of in get_precs.
		 */
		if (callbacks->prec_extra)
			(*(callbacks->prec_extra))(prec, jobacct->id.taskid);

#if _DEBUG
		info("pid:%u ppid:%u rss:%"PRIu64" B",
		     prec->pid, prec->ppid,
		     prec->tres_data[TRES_ARRAY_MEM].size_read);
#endif
		/* find all my descendents */
		if (callbacks->get_offspring_data)
			(*(callbacks->get_offspring_data))
				(prec_list, prec, prec->pid);

		last_total_cputime =
			(double)jobacct->tres_usage_in_tot[TRES_ARRAY_CPU];

		cpu_calc = (prec->ssec + prec->usec) / (double)hertz;

		/*
		 * Since we are not storing things as a double anymore make it
		 * bigger so we don't loose precision.
		 */
		cpu_calc *= CPU_TIME_ADJ;

		prec->tres_data[TRES_ARRAY_CPU].size_read = (uint64_t)cpu_calc;

		/* get energy consumption
		 * only once is enough since we
		 * report per node energy consumption.
		 * Energy is stored in read fields, while power is stored
		 * in write fields.*/
		debug2("energycounted = %d", energy_counted);
		if (energy_counted == 0) {
			acct_gather_energy_g_get_data(
				energy_profile,
				&jobacct->energy);
			prec->tres_data[TRES_ARRAY_ENERGY].size_read =
				jobacct->energy.consumed_energy;
			prec->tres_data[TRES_ARRAY_ENERGY].size_write =
				jobacct->energy.current_watts;
			debug2("%s: energy = %"PRIu64" watts = %"PRIu64" ave_watts = %u",
			       __func__,
			       prec->tres_data[TRES_ARRAY_ENERGY].size_read,
			       prec->tres_data[TRES_ARRAY_ENERGY].size_write,
			       jobacct->energy.ave_watts);
			energy_counted = 1;
		}

		/* tally their usage */
		for (i = 0; i < jobacct->tres_count; i++) {
			if (prec->tres_data[i].size_read == INFINITE64)
				continue;
			if (jobacct->tres_usage_in_max[i] == INFINITE64)
				jobacct->tres_usage_in_max[i] =
					prec->tres_data[i].size_read;
			else
				jobacct->tres_usage_in_max[i] =
					MAX(jobacct->tres_usage_in_max[i],
					    prec->tres_data[i].size_read);
			/*
			 * Even with min we want to get the max as we are
			 * looking at a specific task aso we are always looking
			 * at the max that task had, not the min (or lots of
			 * things will be zero).  The min is from compairing
			 * ranks later when combining.  So here it will be the
			 * same as the max value set above.
			 * (same thing goes for the out)
			 */
			jobacct->tres_usage_in_min[i] =
				jobacct->tres_usage_in_max[i];
			jobacct->tres_usage_in_tot[i] =
				prec->tres_data[i].size_read;

			if (jobacct->tres_usage_out_max[i] == INFINITE64)
				jobacct->tres_usage_out_max[i] =
					prec->tres_data[i].size_write;
			else
				jobacct->tres_usage_out_max[i] =
					MAX(jobacct->tres_usage_out_max[i],
					    prec->tres_data[i].size_write);
			jobacct->tres_usage_out_min[i] =
				jobacct->tres_usage_out_max[i];
			jobacct->tres_usage_out_tot[i] =
				prec->tres_data[i].size_write;
		}

		total_job_mem += jobacct->tres_usage_in_tot[TRES_ARRAY_MEM];
		total_job_vsize += jobacct->tres_usage_in_tot[TRES_ARRAY_VMEM];

		/* Update the cpu times */
		jobacct->user_cpu_sec = (uint32_t)(prec->usec / (double)hertz);
		jobacct->sys_cpu_sec = (uint32_t)(prec->ssec / (double)hertz);

		/* compute frequency */
		jobacct->this_sampled_cputime =
			cpu_calc - last_total_cputime;
		_get_sys_interface_freq_line(
			prec->last_cpu,
			"cpuinfo_cur_freq", sbuf);
		jobacct->act_cpufreq =
			_update_weighted_freq(jobacct, sbuf);

		debug("%s: Task %u pid %d ave_freq = %u mem size/max %"PRIu64"/%"PRIu64" vmem size/max %"PRIu64"/%"PRIu64", disk read size/max (%"PRIu64"/%"PRIu64"), disk write size/max (%"PRIu64"/%"PRIu64"), time %f(%u+%u) Energy tot/max %"PRIu64"/%"PRIu64" TotPower %"PRIu64" MaxPower %"PRIu64" MinPower %"PRIu64,
		      __func__,
		      jobacct->id.taskid,
		      jobacct->pid,
		      jobacct->act_cpufreq,
		      jobacct->tres_usage_in_tot[TRES_ARRAY_MEM],
		      jobacct->tres_usage_in_max[TRES_ARRAY_MEM],
		      jobacct->tres_usage_in_tot[TRES_ARRAY_VMEM],
		      jobacct->tres_usage_in_max[TRES_ARRAY_VMEM],
		      jobacct->tres_usage_in_tot[TRES_ARRAY_FS_DISK],
		      jobacct->tres_usage_in_max[TRES_ARRAY_FS_DISK],
		      jobacct->tres_usage_out_tot[TRES_ARRAY_FS_DISK],
		      jobacct->tres_usage_out_max[TRES_ARRAY_FS_DISK],
		      (double)(jobacct->tres_usage_in_tot[TRES_ARRAY_CPU] /
			       CPU_TIME_ADJ),
		      jobacct->user_cpu_sec,
		      jobacct->sys_cpu_sec,
		      jobacct->tres_usage_in_tot[TRES_ARRAY_ENERGY],
		      jobacct->tres_usage_in_max[TRES_ARRAY_ENERGY],
		      jobacct->tres_usage_out_tot[TRES_ARRAY_ENERGY],
		      jobacct->tres_usage_out_max[TRES_ARRAY_ENERGY],
		      jobacct->tres_usage_out_min[TRES_ARRAY_ENERGY]);

		if (profile &&
		    acct_gather_profile_g_is_active(ACCT_GATHER_PROFILE_TASK)) {
			jobacct->cur_time = ct;

			_record_profile(jobacct);

			jobacct->last_tres_usage_in_tot =
				jobacct->tres_usage_in_tot[TRES_ARRAY_FS_DISK];
			jobacct->last_tres_usage_out_tot =
				jobacct->tres_usage_out_tot[TRES_ARRAY_FS_DISK];
			jobacct->last_total_cputime =
				jobacct->tres_usage_in_tot[TRES_ARRAY_CPU];

			jobacct->last_time = jobacct->cur_time;
		}
	}
	list_iterator_destroy(itr);

	if (over_memory_kill == -1)
		over_memory_kill = slurm_get_job_acct_oom_kill();

	if (over_memory_kill)
		jobacct_gather_handle_mem_limit(total_job_mem,
						total_job_vsize);

finished:
	FREE_NULL_LIST(prec_list);
	processing = 0;
}
Ejemplo n.º 24
0
int kellerman(int n, int (*func)(void *info, int i, int ind[]),
      void *info, void /* glp_graph */ *H_)
{     glp_graph *H = H_;
      struct set W_, *W = &W_, V_, *V = &V_;
      glp_arc *a;
      int i, j, k, m, t, len, card, best;
      xassert(n >= 0);
      /* H := (V, 0; 0), where V is the set of vertices of graph G */
      glp_erase_graph(H, H->v_size, H->a_size);
      glp_add_vertices(H, n);
      /* W := 0 */
      W->size = 0;
      W->list = xcalloc(1+n, sizeof(int));
      W->pos = xcalloc(1+n, sizeof(int));
      memset(&W->pos[1], 0, sizeof(int) * n);
      /* V := 0 */
      V->size = 0;
      V->list = xcalloc(1+n, sizeof(int));
      V->pos = xcalloc(1+n, sizeof(int));
      memset(&V->pos[1], 0, sizeof(int) * n);
      /* main loop */
      for (i = 1; i <= n; i++)
      {  /* W must be empty */
         xassert(W->size == 0);
         /* W := { j : i > j and (i,j) in E } */
         len = func(info, i, W->list);
         xassert(0 <= len && len <= n);
         for (t = 1; t <= len; t++)
         {  j = W->list[t];
            xassert(1 <= j && j <= n);
            if (j >= i) continue;
            xassert(W->pos[j] == 0);
            W->list[++W->size] = j, W->pos[j] = W->size;
         }
         /* on i-th iteration we need to cover edges (i,j) for all
            j in W */
         /* if W is empty, it is a special case */
         if (W->size == 0)
         {  /* set k := k + 1 and create new clique C[k] = { i } */
            k = glp_add_vertices(H, 1) - n;
            glp_add_arc(H, i, n + k);
            continue;
         }
         /* try to include vertex i into existing cliques */
         /* V must be empty */
         xassert(V->size == 0);
         /* k is the number of cliques found so far */
         k = H->nv - n;
         for (m = 1; m <= k; m++)
         {  /* do while V != W; since here V is within W, we can use
               equivalent condition: do while |V| < |W| */
            if (V->size == W->size) break;
            /* check if C[m] is within W */
            for (a = H->v[n + m]->in; a != NULL; a = a->h_next)
            {  j = a->tail->i;
               if (W->pos[j] == 0) break;
            }
            if (a != NULL) continue;
            /* C[m] is within W, expand clique C[m] with vertex i */
            /* C[m] := C[m] union {i} */
            glp_add_arc(H, i, n + m);
            /* V is a set of vertices whose incident edges are already
               covered by existing cliques */
            /* V := V union C[m] */
            for (a = H->v[n + m]->in; a != NULL; a = a->h_next)
            {  j = a->tail->i;
               if (V->pos[j] == 0)
                  V->list[++V->size] = j, V->pos[j] = V->size;
            }
         }
         /* remove from set W the vertices whose incident edges are
            already covered by existing cliques */
         /* W := W \ V, V := 0 */
         for (t = 1; t <= V->size; t++)
         {  j = V->list[t], V->pos[j] = 0;
            if (W->pos[j] != 0)
            {  /* remove vertex j from W */
               if (W->pos[j] != W->size)
               {  int jj = W->list[W->size];
                  W->list[W->pos[j]] = jj;
                  W->pos[jj] = W->pos[j];
               }
               W->size--, W->pos[j] = 0;
            }
         }
         V->size = 0;
         /* now set W contains only vertices whose incident edges are
            still not covered by existing cliques; create new cliques
            to cover remaining edges until set W becomes empty */
         while (W->size > 0)
         {  /* find clique C[m], 1 <= m <= k, which shares maximal
               number of vertices with W; to break ties choose clique
               having smallest number m */
            m = 0, best = -1;
            k = H->nv - n;
            for (t = 1; t <= k; t++)
            {  /* compute cardinality of intersection of W and C[t] */
               card = 0;
               for (a = H->v[n + t]->in; a != NULL; a = a->h_next)
               {  j = a->tail->i;
                  if (W->pos[j] != 0) card++;
               }
               if (best < card)
                  m = t, best = card;
            }
            xassert(m > 0);
            /* set k := k + 1 and create new clique:
               C[k] := (W intersect C[m]) union { i }, which covers all
               edges incident to vertices from (W intersect C[m]) */
            k = glp_add_vertices(H, 1) - n;
            for (a = H->v[n + m]->in; a != NULL; a = a->h_next)
            {  j = a->tail->i;
               if (W->pos[j] != 0)
               {  /* vertex j is in both W and C[m]; include it in new
                     clique C[k] */
                  glp_add_arc(H, j, n + k);
                  /* remove vertex j from W, since edge (i,j) will be
                     covered by new clique C[k] */
                  if (W->pos[j] != W->size)
                  {  int jj = W->list[W->size];
                     W->list[W->pos[j]] = jj;
                     W->pos[jj] = W->pos[j];
                  }
                  W->size--, W->pos[j] = 0;
               }
            }
            /* include vertex i to new clique C[k] to cover edges (i,j)
               incident to all vertices j just removed from W */
            glp_add_arc(H, i, n + k);
         }
      }
      /* free working arrays */
      xfree(W->list);
      xfree(W->pos);
      xfree(V->list);
      xfree(V->pos);
      /* return the number of cliques in the edge covering found */
      return H->nv - n;
}
Ejemplo n.º 25
0
extern int archive_write_file(Buf buffer, char *cluster_name,
			      time_t period_start, time_t period_end,
			      char *arch_dir, char *arch_type,
			      uint32_t archive_period)
{
	int fd = 0;
	int rc = SLURM_SUCCESS;
	char *old_file = NULL, *new_file = NULL, *reg_file = NULL;
	static int high_buffer_size = (1024 * 1024);
	static pthread_mutex_t local_file_lock = PTHREAD_MUTEX_INITIALIZER;

	xassert(buffer);

	slurm_mutex_lock(&local_file_lock);

	/* write the buffer to file */
	reg_file = _make_archive_name(period_start, period_end,
				      cluster_name, arch_dir,
				      arch_type, archive_period);

	debug("Storing %s archive for %s at %s",
	      arch_type, cluster_name, reg_file);
	old_file = xstrdup_printf("%s.old", reg_file);
	new_file = xstrdup_printf("%s.new", reg_file);

	fd = creat(new_file, 0600);
	if (fd < 0) {
		error("Can't save archive, create file %s error %m", new_file);
		rc = SLURM_ERROR;
	} else {
		int pos = 0, nwrite = get_buf_offset(buffer), amount;
		char *data = (char *)get_buf_data(buffer);
		high_buffer_size = MAX(nwrite, high_buffer_size);
		while (nwrite > 0) {
			amount = write(fd, &data[pos], nwrite);
			if ((amount < 0) && (errno != EINTR)) {
				error("Error writing file %s, %m", new_file);
				rc = SLURM_ERROR;
				break;
			}
			nwrite -= amount;
			pos    += amount;
		}
		fsync(fd);
		close(fd);
	}

	if (rc)
		(void) unlink(new_file);
	else {			/* file shuffle */
		int ign;	/* avoid warning */
		(void) unlink(old_file);
		ign =  link(reg_file, old_file);
		(void) unlink(reg_file);
		ign =  link(new_file, reg_file);
		(void) unlink(new_file);
	}
	xfree(old_file);
	xfree(reg_file);
	xfree(new_file);
	slurm_mutex_unlock(&local_file_lock);

	return rc;
}
Ejemplo n.º 26
0
int glp_mincost_okalg(glp_graph *G, int v_rhs, int a_low, int a_cap,
      int a_cost, double *sol, int a_x, int v_pi)
{     /* find minimum-cost flow with out-of-kilter algorithm */
      glp_vertex *v;
      glp_arc *a;
      int nv, na, i, k, s, t, *tail, *head, *low, *cap, *cost, *x, *pi,
         ret;
      double sum, temp;
      if (v_rhs >= 0 && v_rhs > G->v_size - (int)sizeof(double))
         xerror("glp_mincost_okalg: v_rhs = %d; invalid offset\n",
            v_rhs);
      if (a_low >= 0 && a_low > G->a_size - (int)sizeof(double))
         xerror("glp_mincost_okalg: a_low = %d; invalid offset\n",
            a_low);
      if (a_cap >= 0 && a_cap > G->a_size - (int)sizeof(double))
         xerror("glp_mincost_okalg: a_cap = %d; invalid offset\n",
            a_cap);
      if (a_cost >= 0 && a_cost > G->a_size - (int)sizeof(double))
         xerror("glp_mincost_okalg: a_cost = %d; invalid offset\n",
            a_cost);
      if (a_x >= 0 && a_x > G->a_size - (int)sizeof(double))
         xerror("glp_mincost_okalg: a_x = %d; invalid offset\n", a_x);
      if (v_pi >= 0 && v_pi > G->v_size - (int)sizeof(double))
         xerror("glp_mincost_okalg: v_pi = %d; invalid offset\n", v_pi);
      /* s is artificial source node */
      s = G->nv + 1;
      /* t is artificial sink node */
      t = s + 1;
      /* nv is the total number of nodes in the resulting network */
      nv = t;
      /* na is the total number of arcs in the resulting network */
      na = G->na + 1;
      for (i = 1; i <= G->nv; i++)
      {  v = G->v[i];
         if (v_rhs >= 0)
            memcpy(&temp, (char *)v->data + v_rhs, sizeof(double));
         else
            temp = 0.0;
         if (temp != 0.0) na++;
      }
      /* allocate working arrays */
      tail = xcalloc(1+na, sizeof(int));
      head = xcalloc(1+na, sizeof(int));
      low = xcalloc(1+na, sizeof(int));
      cap = xcalloc(1+na, sizeof(int));
      cost = xcalloc(1+na, sizeof(int));
      x = xcalloc(1+na, sizeof(int));
      pi = xcalloc(1+nv, sizeof(int));
      /* construct the resulting network */
      k = 0;
      /* (original arcs) */
      for (i = 1; i <= G->nv; i++)
      {  v = G->v[i];
         for (a = v->out; a != NULL; a = a->t_next)
         {  k++;
            tail[k] = a->tail->i;
            head[k] = a->head->i;
            if (tail[k] == head[k])
            {  ret = GLP_EDATA;
               goto done;
            }
            if (a_low >= 0)
               memcpy(&temp, (char *)a->data + a_low, sizeof(double));
            else
               temp = 0.0;
            if (!(0.0 <= temp && temp <= (double)INT_MAX &&
                  temp == floor(temp)))
            {  ret = GLP_EDATA;
               goto done;
            }
            low[k] = (int)temp;
            if (a_cap >= 0)
               memcpy(&temp, (char *)a->data + a_cap, sizeof(double));
            else
               temp = 1.0;
            if (!((double)low[k] <= temp && temp <= (double)INT_MAX &&
                  temp == floor(temp)))
            {  ret = GLP_EDATA;
               goto done;
            }
            cap[k] = (int)temp;
            if (a_cost >= 0)
               memcpy(&temp, (char *)a->data + a_cost, sizeof(double));
            else
               temp = 0.0;
            if (!(fabs(temp) <= (double)INT_MAX && temp == floor(temp)))
            {  ret = GLP_EDATA;
               goto done;
            }
            cost[k] = (int)temp;
         }
      }
      /* (artificial arcs) */
      sum = 0.0;
      for (i = 1; i <= G->nv; i++)
      {  v = G->v[i];
         if (v_rhs >= 0)
            memcpy(&temp, (char *)v->data + v_rhs, sizeof(double));
         else
            temp = 0.0;
         if (!(fabs(temp) <= (double)INT_MAX && temp == floor(temp)))
         {  ret = GLP_EDATA;
            goto done;
         }
         if (temp > 0.0)
         {  /* artificial arc from s to original source i */
            k++;
            tail[k] = s;
            head[k] = i;
            low[k] = cap[k] = (int)(+temp); /* supply */
            cost[k] = 0;
            sum += (double)temp;
         }
         else if (temp < 0.0)
         {  /* artificial arc from original sink i to t */
            k++;
            tail[k] = i;
            head[k] = t;
            low[k] = cap[k] = (int)(-temp); /* demand */
            cost[k] = 0;
         }
      }
      /* (feedback arc from t to s) */
      k++;
      xassert(k == na);
      tail[k] = t;
      head[k] = s;
      if (sum > (double)INT_MAX)
      {  ret = GLP_EDATA;
         goto done;
      }
      low[k] = cap[k] = (int)sum; /* total supply/demand */
      cost[k] = 0;
      /* find minimal-cost circulation in the resulting network */
      ret = okalg(nv, na, tail, head, low, cap, cost, x, pi);
      switch (ret)
      {  case 0:
            /* optimal circulation found */
            ret = 0;
            break;
         case 1:
            /* no feasible circulation exists */
            ret = GLP_ENOPFS;
            break;
         case 2:
            /* integer overflow occured */
            ret = GLP_ERANGE;
            goto done;
         case 3:
            /* optimality test failed (logic error) */
            ret = GLP_EFAIL;
            goto done;
         default:
            xassert(ret != ret);
      }
      /* store solution components */
      /* (objective function = the total cost) */
      if (sol != NULL)
      {  temp = 0.0;
         for (k = 1; k <= na; k++)
            temp += (double)cost[k] * (double)x[k];
         *sol = temp;
      }
      /* (arc flows) */
      if (a_x >= 0)
      {  k = 0;
         for (i = 1; i <= G->nv; i++)
         {  v = G->v[i];
            for (a = v->out; a != NULL; a = a->t_next)
            {  temp = (double)x[++k];
               memcpy((char *)a->data + a_x, &temp, sizeof(double));
            }
         }
      }
      /* (node potentials = Lagrange multipliers) */
      if (v_pi >= 0)
      {  for (i = 1; i <= G->nv; i++)
         {  v = G->v[i];
            temp = - (double)pi[i];
            memcpy((char *)v->data + v_pi, &temp, sizeof(double));
         }
      }
done: /* free working arrays */
      xfree(tail);
      xfree(head);
      xfree(low);
      xfree(cap);
      xfree(cost);
      xfree(x);
      xfree(pi);
      return ret;
}
Ejemplo n.º 27
0
extern int bg_free_block(bg_record_t *bg_record, bool wait, bool locked)
{
	int rc = SLURM_SUCCESS;
	int count = 0;

	if (!bg_record) {
		error("bg_free_block: there was no bg_record");
		return SLURM_ERROR;
	}

	if (!locked)
		slurm_mutex_lock(&block_state_mutex);

	while (count < MAX_FREE_RETRIES) {
		/* block was removed */
		if (bg_record->magic != BLOCK_MAGIC) {
			error("block was removed while freeing it here");
			xassert(0);
			if (!locked)
				slurm_mutex_unlock(&block_state_mutex);
			return SLURM_SUCCESS;
		}
		/* Reset these here so we don't try to reboot it
		   when the state goes to free.
		*/
		bg_record->boot_state = 0;
		bg_record->boot_count = 0;
		/* Here we don't need to check if the block is still
		 * in exsistance since this function can't be called on
		 * the same block twice.  It may
		 * had already been removed at this point also.
		 */
#ifdef HAVE_BG_FILES
		if (bg_record->state != BG_BLOCK_FREE
		    && bg_record->state != BG_BLOCK_TERM) {
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
				info("bridge_destroy %s",
				     bg_record->bg_block_id);
			rc = bridge_block_free(bg_record);
			if (rc != SLURM_SUCCESS) {
				if (rc == BG_ERROR_BLOCK_NOT_FOUND) {
					debug("block %s is not found",
					      bg_record->bg_block_id);
					bg_record->state = BG_BLOCK_FREE;
					break;
				} else if (rc == BG_ERROR_FREE) {
					if (bg_conf->slurm_debug_flags
					    & DEBUG_FLAG_SELECT_TYPE)
						info("bridge_block_free"
						     "(%s): %s State = %s",
						     bg_record->bg_block_id,
						     bg_err_str(rc),
						     bg_block_state_string(
							     bg_record->state));
				} else if (rc == BG_ERROR_INVALID_STATE) {
#ifndef HAVE_BGL
					/* If the state is error and
					   we get an incompatible
					   state back here, it means
					   we set it ourselves so
					   break out.
					*/
					if (bg_record->state
					    & BG_BLOCK_ERROR_FLAG)
						break;
#endif
					if (bg_conf->slurm_debug_flags
					    & DEBUG_FLAG_SELECT_TYPE)
						info("bridge_block_free"
						     "(%s): %s State = %s",
						     bg_record->bg_block_id,
						     bg_err_str(rc),
						     bg_block_state_string(
							     bg_record->state));
#ifdef HAVE_BGQ
					if (bg_record->state != BG_BLOCK_FREE
					    && bg_record->state
					    != BG_BLOCK_TERM)
					bg_record->state = BG_BLOCK_TERM;
#endif
				} else {
					error("bridge_block_free"
					      "(%s): %s State = %s",
					      bg_record->bg_block_id,
					      bg_err_str(rc),
					      bg_block_state_string(
						      bg_record->state));
				}
			}
		}
#else
		/* Fake a free since we are n deallocating
		   state before this.
		*/
		if (bg_record->state & BG_BLOCK_ERROR_FLAG) {
			/* This will set the state to ERROR(Free)
			 * just incase the state was ERROR(SOMETHING ELSE) */
			bg_record->state = BG_BLOCK_ERROR_FLAG;
			break;
		} else if (!wait || (count >= 3))
			bg_record->state = BG_BLOCK_FREE;
		else if (bg_record->state != BG_BLOCK_FREE)
			bg_record->state = BG_BLOCK_TERM;
#endif

		if (!wait || (bg_record->state == BG_BLOCK_FREE)
#ifndef HAVE_BGL
		    ||  (bg_record->state & BG_BLOCK_ERROR_FLAG)
#endif
			) {
			break;
		}
		/* If we were locked outside of this we need to unlock
		   to not cause deadlock on this mutex until we are
		   done.
		*/
		slurm_mutex_unlock(&block_state_mutex);
		sleep(FREE_SLEEP_INTERVAL);
		count++;
		slurm_mutex_lock(&block_state_mutex);
	}

	rc = SLURM_SUCCESS;
	if ((bg_record->state == BG_BLOCK_FREE)
	    || (bg_record->state & BG_BLOCK_ERROR_FLAG)) {

		if (bg_record->err_ratio
		    && (bg_record->state == BG_BLOCK_FREE)) {
			/* Sometime the realtime server can report
			   software error on cnodes even though the
			   block is free.  If this is the case we need
			   to manually clear them.
			*/
			ba_mp_t *found_ba_mp;
			ListIterator itr =
				list_iterator_create(bg_record->ba_mp_list);
			debug("block %s is free, but has %u cnodes in error",
			      bg_record->bg_block_id, bg_record->cnode_err_cnt);
			while ((found_ba_mp = list_next(itr))) {
				if (!found_ba_mp->used)
					continue;

				if (!found_ba_mp->cnode_err_bitmap)
					found_ba_mp->cnode_err_bitmap =
						bit_alloc(
							bg_conf->mp_cnode_cnt);

				bit_nclear(found_ba_mp->cnode_err_bitmap, 0,
					   bit_size(found_ba_mp->
						    cnode_err_bitmap)-1);
			}
			list_iterator_destroy(itr);
			bg_record->cnode_err_cnt = 0;
			bg_record->err_ratio = 0;
		}

		remove_from_bg_list(bg_lists->booted, bg_record);
	} else if (count >= MAX_FREE_RETRIES) {
		/* Something isn't right, go mark this one in an error
		   state. */
		update_block_msg_t block_msg;
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("bg_free_block: block %s is not in state "
			     "free (%s), putting it in error state.",
			     bg_record->bg_block_id,
			     bg_block_state_string(bg_record->state));
		slurm_init_update_block_msg(&block_msg);
		block_msg.bg_block_id = bg_record->bg_block_id;
		block_msg.state = BG_BLOCK_ERROR_FLAG;
		block_msg.reason = "Block would not deallocate";
		slurm_mutex_unlock(&block_state_mutex);
		select_g_update_block(&block_msg);
		slurm_mutex_lock(&block_state_mutex);
		rc = SLURM_ERROR;
	}
	if (!locked)
		slurm_mutex_unlock(&block_state_mutex);

	return rc;
}
Ejemplo n.º 28
0
void glp_maxflow_lp(glp_prob *lp, glp_graph *G, int names, int s,
      int t, int a_cap)
{     glp_vertex *v;
      glp_arc *a;
      int i, j, type, ind[1+2];
      double cap, val[1+2];
      if (!(names == GLP_ON || names == GLP_OFF))
         xerror("glp_maxflow_lp: names = %d; invalid parameter\n",
            names);
      if (!(1 <= s && s <= G->nv))
         xerror("glp_maxflow_lp: s = %d; source node number out of rang"
            "e\n", s);
      if (!(1 <= t && t <= G->nv))
         xerror("glp_maxflow_lp: t = %d: sink node number out of range "
            "\n", t);
      if (s == t)
         xerror("glp_maxflow_lp: s = t = %d; source and sink nodes must"
            " be distinct\n", s);
      if (a_cap >= 0 && a_cap > G->a_size - (int)sizeof(double))
         xerror("glp_maxflow_lp: a_cap = %d; invalid offset\n", a_cap);
      glp_erase_prob(lp);
      if (names) glp_set_prob_name(lp, G->name);
      glp_set_obj_dir(lp, GLP_MAX);
      glp_add_rows(lp, G->nv);
      for (i = 1; i <= G->nv; i++)
      {  v = G->v[i];
         if (names) glp_set_row_name(lp, i, v->name);
         if (i == s)
            type = GLP_LO;
         else if (i == t)
            type = GLP_UP;
         else
            type = GLP_FX;
         glp_set_row_bnds(lp, i, type, 0.0, 0.0);
      }
      if (G->na > 0) glp_add_cols(lp, G->na);
      for (i = 1, j = 0; i <= G->nv; i++)
      {  v = G->v[i];
         for (a = v->out; a != NULL; a = a->t_next)
         {  j++;
            if (names)
            {  char name[50+1];
               sprintf(name, "x[%d,%d]", a->tail->i, a->head->i);
               xassert(strlen(name) < sizeof(name));
               glp_set_col_name(lp, j, name);
            }
            if (a->tail->i != a->head->i)
            {  ind[1] = a->tail->i, val[1] = +1.0;
               ind[2] = a->head->i, val[2] = -1.0;
               glp_set_mat_col(lp, j, 2, ind, val);
            }
            if (a_cap >= 0)
               memcpy(&cap, (char *)a->data + a_cap, sizeof(double));
            else
               cap = 1.0;
            if (cap == DBL_MAX)
               type = GLP_LO;
            else if (cap != 0.0)
               type = GLP_DB;
            else
               type = GLP_FX;
            glp_set_col_bnds(lp, j, type, 0.0, cap);
            if (a->tail->i == s)
               glp_set_obj_coef(lp, j, +1.0);
            else if (a->head->i == s)
               glp_set_obj_coef(lp, j, -1.0);
         }
      }
      xassert(j == G->na);
      return;
}
Ejemplo n.º 29
0
/*
 * Update front end node state
 * update_front_end_msg_ptr IN change specification
 * RET SLURM_SUCCESS or error code
 */
extern int update_front_end(update_front_end_msg_t *msg_ptr)
{
#ifdef HAVE_FRONT_END
	char  *this_node_name = NULL;
	hostlist_t host_list;
	front_end_record_t *front_end_ptr;
	int i, rc = SLURM_SUCCESS;
	time_t now = time(NULL);

	if ((host_list = hostlist_create(msg_ptr->name)) == NULL) {
		error("hostlist_create error on %s: %m", msg_ptr->name);
		return ESLURM_INVALID_NODE_NAME;
	}

	last_front_end_update = now;
	while ((this_node_name = hostlist_shift(host_list))) {
		for (i = 0, front_end_ptr = front_end_nodes;
		     i < front_end_node_cnt; i++, front_end_ptr++) {
			xassert(front_end_ptr->magic == FRONT_END_MAGIC);
			if (xstrcmp(this_node_name, front_end_ptr->name))
				continue;
			if (msg_ptr->node_state == (uint32_t)NO_VAL) {
				;	/* No change in node state */
			} else if (msg_ptr->node_state == NODE_RESUME) {
				front_end_ptr->node_state = NODE_STATE_IDLE;
				xfree(front_end_ptr->reason);
				front_end_ptr->reason_time = 0;
				front_end_ptr->reason_uid = 0;
			} else if (msg_ptr->node_state == NODE_STATE_DRAIN) {
				front_end_ptr->node_state |= NODE_STATE_DRAIN;
				if (msg_ptr->reason) {
					xfree(front_end_ptr->reason);
					front_end_ptr->reason =
						xstrdup(msg_ptr->reason);
					front_end_ptr->reason_time = now;
					front_end_ptr->reason_uid =
						msg_ptr->reason_uid;
				}
			} else if (msg_ptr->node_state == NODE_STATE_DOWN) {
				set_front_end_down(front_end_ptr,
						   msg_ptr->reason);
			}
			if (msg_ptr->node_state != (uint32_t) NO_VAL) {
				info("update_front_end: set state of %s to %s",
				     this_node_name,
				     node_state_string(front_end_ptr->
						       node_state));
			}
			break;
		}
		if (i >= front_end_node_cnt) {
			info("update_front_end: could not find front end: %s",
			     this_node_name);
			rc = ESLURM_INVALID_NODE_NAME;
		}
		free(this_node_name);
	}
	hostlist_destroy(host_list);

	return rc;
#else
	return ESLURM_INVALID_NODE_NAME;
#endif
}
Ejemplo n.º 30
0
/*
 * _lllp_generate_cpu_bind
 *
 * Generate the cpu_bind type and string given an array of bitstr_t masks
 *
 * IN/OUT- job launch request (cpu_bind_type and cpu_bind updated)
 * IN- maximum number of tasks
 * IN- array of masks
 */
static void _lllp_generate_cpu_bind(launch_tasks_request_msg_t *req,
				    const uint32_t maxtasks, bitstr_t **masks)
{
    	int i, num_bits=0, masks_len;
	bitstr_t *bitmask;
	bitoff_t charsize;
	char *masks_str = NULL;
	char buf_type[100];

	for (i = 0; i < maxtasks; i++) {
		bitmask = masks[i];
	    	if (bitmask) {
			num_bits = bit_size(bitmask);
			break;
		}
	}
	charsize = (num_bits + 3) / 4;		/* ASCII hex digits */
	charsize += 3;				/* "0x" and trailing "," */
	masks_len = maxtasks * charsize + 1;	/* number of masks + null */

	debug3("_lllp_generate_cpu_bind %d %d %d", maxtasks, charsize,
		masks_len);

	masks_str = xmalloc(masks_len);
	masks_len = 0;
	for (i = 0; i < maxtasks; i++) {
	    	char *str;
		int curlen;
		bitmask = masks[i];
	    	if (bitmask == NULL) {
			continue;
		}
		str = (char *)bit_fmt_hexmask(bitmask);
		curlen = strlen(str) + 1;

		if (masks_len > 0)
			masks_str[masks_len-1]=',';
		strncpy(&masks_str[masks_len], str, curlen);
		masks_len += curlen;
		xassert(masks_str[masks_len] == '\0');
		xfree(str);
	}

	if (req->cpu_bind) {
	    	xfree(req->cpu_bind);
	}
	if (masks_str[0] != '\0') {
		req->cpu_bind = masks_str;
		req->cpu_bind_type |= CPU_BIND_MASK;
	} else {
		req->cpu_bind = NULL;
		req->cpu_bind_type &= ~CPU_BIND_VERBOSE;
	}

	/* clear mask generation bits */
	req->cpu_bind_type &= ~CPU_BIND_TO_THREADS;
	req->cpu_bind_type &= ~CPU_BIND_TO_CORES;
	req->cpu_bind_type &= ~CPU_BIND_TO_SOCKETS;
	req->cpu_bind_type &= ~CPU_BIND_TO_LDOMS;

	slurm_sprint_cpu_bind_type(buf_type, req->cpu_bind_type);
	info("_lllp_generate_cpu_bind jobid [%u]: %s, %s",
	     req->job_id, buf_type, masks_str);
}