Beispiel #1
0
/*
 * reset usage_raw, and grp_used_wall on all associations
 * This should be called every PriorityUsageResetPeriod
 * RET: SLURM_SUCCESS on SUCCESS, SLURM_ERROR else.
 */
static int _reset_usage(void)
{
	ListIterator itr = NULL;
	slurmdb_association_rec_t *assoc = NULL;
	slurmdb_qos_rec_t *qos = NULL;
	assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK,
				   WRITE_LOCK, NO_LOCK, NO_LOCK };

	if (!calc_fairshare)
		return SLURM_SUCCESS;

	assoc_mgr_lock(&locks);

	xassert(assoc_mgr_association_list);

	itr = list_iterator_create(assoc_mgr_association_list);
	/* We want to do this to all associations including
	   root.  All usage_raws are calculated from the bottom up.
	*/
	while ((assoc = list_next(itr))) {
		assoc->usage->usage_raw = 0;
		assoc->usage->grp_used_wall = 0;
	}
	list_iterator_destroy(itr);

	itr = list_iterator_create(assoc_mgr_qos_list);
	while ((qos = list_next(itr))) {
		qos->usage->usage_raw = 0;
		qos->usage->grp_used_wall = 0;
	}
	list_iterator_destroy(itr);
	assoc_mgr_unlock(&locks);

	return SLURM_SUCCESS;
}
Beispiel #2
0
extern void priority_p_job_end(struct job_record *job_ptr)
{
	uint64_t unused_cpu_run_secs = 0;
	uint64_t time_limit_secs = (uint64_t)job_ptr->time_limit * 60;
	slurmdb_assoc_rec_t *assoc_ptr;
	assoc_mgr_lock_t locks = { NO_LOCK, WRITE_LOCK, NO_LOCK,
				   WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };

	/* No unused cpu_run_secs if job ran past its time limit */
	if (job_ptr->end_time >= job_ptr->start_time + time_limit_secs)
		return;

	unused_cpu_run_secs = job_ptr->total_cpus *
		(job_ptr->start_time + time_limit_secs - job_ptr->end_time);

	assoc_mgr_lock(&locks);
	if (job_ptr->qos_ptr) {
		slurmdb_qos_rec_t *qos_ptr =
			(slurmdb_qos_rec_t *)job_ptr->qos_ptr;
		if (unused_cpu_run_secs >
		    qos_ptr->usage->grp_used_cpu_run_secs) {
			qos_ptr->usage->grp_used_cpu_run_secs = 0;
			debug2("acct_policy_job_fini: "
			       "grp_used_cpu_run_secs "
			       "underflow for qos %s", qos_ptr->name);
		} else
			qos_ptr->usage->grp_used_cpu_run_secs -=
				unused_cpu_run_secs;
	}
	assoc_ptr = (slurmdb_assoc_rec_t *)job_ptr->assoc_ptr;
	while (assoc_ptr) {
		/* If the job finished early remove the extra time now. */
		if (unused_cpu_run_secs >
		    assoc_ptr->usage->grp_used_cpu_run_secs) {
			assoc_ptr->usage->grp_used_cpu_run_secs = 0;
			debug2("acct_policy_job_fini: "
			       "grp_used_cpu_run_secs "
			       "underflow for account %s",
			       assoc_ptr->acct);
		} else {
			assoc_ptr->usage->grp_used_cpu_run_secs -=
				unused_cpu_run_secs;
			debug4("acct_policy_job_fini: job %u. "
			       "Removed %"PRIu64" unused seconds "
			       "from assoc %s "
			       "grp_used_cpu_run_secs = %"PRIu64"",
			       job_ptr->job_id, unused_cpu_run_secs,
			       assoc_ptr->acct,
			       assoc_ptr->usage->grp_used_cpu_run_secs);
		}
		/* now handle all the group limits of the parents */
		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
	}
	assoc_mgr_unlock(&locks);

	return;
}
Beispiel #3
0
/* job_ptr should already have the partition priority and such added
 * here before had we will be adding to it
 */
static double _get_fairshare_priority( struct job_record *job_ptr)
{
	slurmdb_association_rec_t *job_assoc =
		(slurmdb_association_rec_t *)job_ptr->assoc_ptr;
	slurmdb_association_rec_t *fs_assoc = NULL;
	double priority_fs = 0.0;
	assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK,
				   NO_LOCK, NO_LOCK, NO_LOCK };

	if (!calc_fairshare)
		return 0;

	if (!job_assoc) {
		error("Job %u has no association.  Unable to "
		      "compute fairshare.", job_ptr->job_id);
		return 0;
	}

	fs_assoc = job_assoc;

	assoc_mgr_lock(&locks);

	/* Use values from parent when FairShare=SLURMDB_FS_USE_PARENT */
	while ((fs_assoc->shares_raw == SLURMDB_FS_USE_PARENT)
	       && fs_assoc->usage->parent_assoc_ptr
	       && (fs_assoc != assoc_mgr_root_assoc)) {
		fs_assoc = fs_assoc->usage->parent_assoc_ptr;
	}

	if (fuzzy_equal(fs_assoc->usage->usage_efctv, NO_VAL))
		priority_p_set_assoc_usage(fs_assoc);

	/* Priority is 0 -> 1 */
	priority_fs = priority_p_calc_fs_factor(
		fs_assoc->usage->usage_efctv,
		(long double)fs_assoc->usage->shares_norm);
	if (priority_debug) {
		info("Fairshare priority of job %u for user %s in acct"
		     " %s is 2**(-%Lf/%f) = %f",
		     job_ptr->job_id, job_assoc->user, job_assoc->acct,
		     fs_assoc->usage->usage_efctv,
		     fs_assoc->usage->shares_norm, priority_fs);
	}

	assoc_mgr_unlock(&locks);

	return priority_fs;
}
Beispiel #4
0
/*
 * apply decay factor to all associations usage_raw
 * IN: decay_factor - decay to be applied to each associations' used
 * shares.  This should already be modified with the amount of delta
 * time from last application..
 * RET: SLURM_SUCCESS on SUCCESS, SLURM_ERROR else.
 */
static int _apply_decay(double decay_factor)
{
	ListIterator itr = NULL;
	slurmdb_association_rec_t *assoc = NULL;
	slurmdb_qos_rec_t *qos = NULL;
	assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK,
				   WRITE_LOCK, NO_LOCK, NO_LOCK };

	/* continue if decay_factor is 0 or 1 since that doesn't help
	   us at all. 1 means no decay and 0 will just zero
	   everything out so don't waste time doing it */
	if (!decay_factor)
		return SLURM_ERROR;
	else if (!calc_fairshare)
		return SLURM_SUCCESS;

	assoc_mgr_lock(&locks);

	xassert(assoc_mgr_association_list);
	xassert(assoc_mgr_qos_list);

	itr = list_iterator_create(assoc_mgr_association_list);
	/* We want to do this to all associations including
	   root.  All usage_raws are calculated from the bottom up.
	*/
	while ((assoc = list_next(itr))) {
		assoc->usage->usage_raw *= decay_factor;
		assoc->usage->grp_used_wall *= decay_factor;
	}
	list_iterator_destroy(itr);

	itr = list_iterator_create(assoc_mgr_qos_list);
	while ((qos = list_next(itr))) {
		qos->usage->usage_raw *= decay_factor;
		qos->usage->grp_used_wall *= decay_factor;
	}
	list_iterator_destroy(itr);
	assoc_mgr_unlock(&locks);

	return SLURM_SUCCESS;
}
Beispiel #5
0
/* Fair Tree code called from the decay thread loop */
extern void fair_tree_decay(List jobs, time_t start)
{
	slurmctld_lock_t job_write_lock =
		{ NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK };
	assoc_mgr_lock_t locks =
		{ WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };

	/* apply decayed usage */
	lock_slurmctld(job_write_lock);
	list_for_each(jobs, (ListForF) _ft_decay_apply_new_usage, &start);
	unlock_slurmctld(job_write_lock);

	/* calculate fs factor for associations */
	assoc_mgr_lock(&locks);
	_apply_priority_fs();
	assoc_mgr_unlock(&locks);

	/* assign job priorities */
	lock_slurmctld(job_write_lock);
	list_for_each(jobs, (ListForF) decay_apply_weighted_factors, &start);
	unlock_slurmctld(job_write_lock);
}
Beispiel #6
0
extern void print_jag_prec(jag_prec_t *prec)
{
	int i;
	assoc_mgr_lock_t locks = {
		NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK,
		READ_LOCK, NO_LOCK, NO_LOCK };

	info("pid %d (ppid %d)", prec->pid, prec->ppid);
	info("act_cpufreq\t%d", prec->act_cpufreq);
	info("ssec \t%f", prec->ssec);
	assoc_mgr_lock(&locks);
	for (i = 0; i < prec->tres_count; i++) {
		if (prec->tres_data[i].size_read == INFINITE64)
			continue;
		info("%s in/read \t%"PRIu64"",
		     assoc_mgr_tres_name_array[i],
		     prec->tres_data[i].size_read);
		info("%s out/write \t%"PRIu64"",
		     assoc_mgr_tres_name_array[i],
		     prec->tres_data[i].size_write);
	}
	assoc_mgr_unlock(&locks);
	info("usec \t%f", prec->usec);
}
extern List as_mysql_modify_qos(mysql_conn_t *mysql_conn, uint32_t uid,
				slurmdb_qos_cond_t *qos_cond,
				slurmdb_qos_rec_t *qos)
{
	ListIterator itr = NULL;
	List ret_list = NULL;
	int rc = SLURM_SUCCESS;
	char *object = NULL;
	char *vals = NULL, *extra = NULL, *query = NULL, *name_char = NULL;
	time_t now = time(NULL);
	char *user_name = NULL;
	int set = 0, i;
	MYSQL_RES *result = NULL;
	MYSQL_ROW row;
	char *tmp_char1=NULL, *tmp_char2=NULL;
	bitstr_t *preempt_bitstr = NULL;
	char *added_preempt = NULL;
	uint32_t qos_cnt;
	assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK,
				   NO_LOCK, NO_LOCK, NO_LOCK };

	if (!qos_cond || !qos) {
		error("we need something to change");
		return NULL;
	}

	if (check_connection(mysql_conn) != SLURM_SUCCESS)
		return NULL;

	if (!is_user_min_admin_level(mysql_conn, uid,
				     SLURMDB_ADMIN_SUPER_USER)) {
		errno = ESLURM_ACCESS_DENIED;
		return NULL;
	}

	xstrcat(extra, "where deleted=0");

	if (qos_cond->description_list
	    && list_count(qos_cond->description_list)) {
		set = 0;
		xstrcat(extra, " && (");
		itr = list_iterator_create(qos_cond->description_list);
		while ((object = list_next(itr))) {
			if (set)
				xstrcat(extra, " || ");
			xstrfmtcat(extra, "description='%s'", object);
			set = 1;
		}
		list_iterator_destroy(itr);
		xstrcat(extra, ")");
	}

	if (qos_cond->id_list
	    && list_count(qos_cond->id_list)) {
		set = 0;
		xstrcat(extra, " && (");
		itr = list_iterator_create(qos_cond->id_list);
		while ((object = list_next(itr))) {
			if (set)
				xstrcat(extra, " || ");
			xstrfmtcat(extra, "id='%s'", object);
			set = 1;
		}
		list_iterator_destroy(itr);
		xstrcat(extra, ")");
	}

	if (qos_cond->name_list
	    && list_count(qos_cond->name_list)) {
		set = 0;
		xstrcat(extra, " && (");
		itr = list_iterator_create(qos_cond->name_list);
		while ((object = list_next(itr))) {
			if (set)
				xstrcat(extra, " || ");
			xstrfmtcat(extra, "name='%s'", object);
			set = 1;
		}
		list_iterator_destroy(itr);
		xstrcat(extra, ")");
	}

	_setup_qos_limits(qos, &tmp_char1, &tmp_char2,
			  &vals, &added_preempt, 0);

	assoc_mgr_lock(&locks);
	qos_cnt = g_qos_count;
	assoc_mgr_unlock(&locks);

	if (added_preempt) {
		preempt_bitstr = bit_alloc(qos_cnt);
		bit_unfmt(preempt_bitstr, added_preempt+1);
		xfree(added_preempt);
	}
	xfree(tmp_char1);
	xfree(tmp_char2);

	if (!extra || !vals) {
		errno = SLURM_NO_CHANGE_IN_DATA;
		FREE_NULL_BITMAP(preempt_bitstr);
		error("Nothing to change");
		return NULL;
	}

	object = xstrdup(mqos_req_inx[0]);
	for (i = 1; i < MQOS_COUNT; i++)
		xstrfmtcat(object, ", %s", mqos_req_inx[i]);

	query = xstrdup_printf("select %s from %s %s;",
			       object, qos_table, extra);
	xfree(extra);
	xfree(object);
	if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
		xfree(query);
		FREE_NULL_BITMAP(preempt_bitstr);
		return NULL;
	}

	rc = 0;
	ret_list = list_create(slurm_destroy_char);
	while ((row = mysql_fetch_row(result))) {
		slurmdb_qos_rec_t *qos_rec = NULL;
		uint32_t id = slurm_atoul(row[MQOS_ID]);
		if (preempt_bitstr) {
			if (_preemption_loop(mysql_conn, id, preempt_bitstr))
				break;
		}
		object = xstrdup(row[MQOS_NAME]);
		list_append(ret_list, object);
		if (!rc) {
			xstrfmtcat(name_char, "(name='%s'", object);
			rc = 1;
		} else  {
			xstrfmtcat(name_char, " || name='%s'", object);
		}

		qos_rec = xmalloc(sizeof(slurmdb_qos_rec_t));
		qos_rec->name = xstrdup(object);
		qos_rec->id = id;
		qos_rec->flags = qos->flags;

		qos_rec->grace_time = qos->grace_time;

		mod_tres_str(&qos_rec->grp_tres,
			     qos->grp_tres, row[MQOS_GT],
			     NULL, "grp_tres", &vals, qos_rec->id, 0);
		mod_tres_str(&qos_rec->grp_tres_mins,
			     qos->grp_tres_mins, row[MQOS_GTM],
			     NULL, "grp_tres_mins", &vals, qos_rec->id, 0);
		mod_tres_str(&qos_rec->grp_tres_run_mins,
			     qos->grp_tres_run_mins, row[MQOS_GTRM],
			     NULL, "grp_tres_run_mins", &vals,
			     qos_rec->id, 0);

		qos_rec->grp_jobs = qos->grp_jobs;
		qos_rec->grp_submit_jobs = qos->grp_submit_jobs;
		qos_rec->grp_wall = qos->grp_wall;

		mod_tres_str(&qos_rec->max_tres_pa,
			     qos->max_tres_pa, row[MQOS_MTPA],
			     NULL, "max_tres_pa", &vals, qos_rec->id, 0);
		mod_tres_str(&qos_rec->max_tres_pj,
			     qos->max_tres_pj, row[MQOS_MTPJ],
			     NULL, "max_tres_pj", &vals, qos_rec->id, 0);
		mod_tres_str(&qos_rec->max_tres_pn,
			     qos->max_tres_pn, row[MQOS_MTPN],
			     NULL, "max_tres_pn", &vals, qos_rec->id, 0);
		mod_tres_str(&qos_rec->max_tres_pu,
			     qos->max_tres_pu, row[MQOS_MTPU],
			     NULL, "max_tres_pu", &vals, qos_rec->id, 0);
		mod_tres_str(&qos_rec->max_tres_mins_pj,
			     qos->max_tres_mins_pj, row[MQOS_MTMPJ],
			     NULL, "max_tres_mins_pj", &vals, qos_rec->id, 0);
		mod_tres_str(&qos_rec->max_tres_run_mins_pa,
			     qos->max_tres_run_mins_pa, row[MQOS_MTRM],
			     NULL, "max_tres_run_mins_pa", &vals,
			     qos_rec->id, 0);
		mod_tres_str(&qos_rec->max_tres_run_mins_pu,
			     qos->max_tres_run_mins_pu, row[MQOS_MTRM],
			     NULL, "max_tres_run_mins_pu", &vals,
			     qos_rec->id, 0);

		qos_rec->max_jobs_pa  = qos->max_jobs_pa;
		qos_rec->max_jobs_pu  = qos->max_jobs_pu;
		qos_rec->max_submit_jobs_pa  = qos->max_submit_jobs_pa;
		qos_rec->max_submit_jobs_pu  = qos->max_submit_jobs_pu;
		qos_rec->max_wall_pj = qos->max_wall_pj;

		mod_tres_str(&qos_rec->min_tres_pj,
			     qos->min_tres_pj, row[MQOS_MITPJ],
			     NULL, "min_tres_pj", &vals, qos_rec->id, 0);

		qos_rec->preempt_mode = qos->preempt_mode;
		qos_rec->priority = qos->priority;

		if (qos->preempt_list) {
			ListIterator new_preempt_itr =
				list_iterator_create(qos->preempt_list);
			char *new_preempt = NULL;
			bool cleared = 0;

			qos_rec->preempt_bitstr = bit_alloc(qos_cnt);

			if (row[MQOS_PREEMPT] && row[MQOS_PREEMPT][0])
				bit_unfmt(qos_rec->preempt_bitstr,
					  row[MQOS_PREEMPT]+1);

			while ((new_preempt = list_next(new_preempt_itr))) {
				if (new_preempt[0] == '-') {
					bit_clear(qos_rec->preempt_bitstr,
						  atol(new_preempt+1));
				} else if (new_preempt[0] == '+') {
					bit_set(qos_rec->preempt_bitstr,
						atol(new_preempt+1));
				} else {
					if (!cleared) {
						cleared = 1;
						bit_nclear(
							qos_rec->preempt_bitstr,
							0,
							qos_cnt-1);
					}

					bit_set(qos_rec->preempt_bitstr,
						atol(new_preempt));
				}
			}
			list_iterator_destroy(new_preempt_itr);
		}

		qos_rec->usage_factor = qos->usage_factor;
		qos_rec->usage_thres = qos->usage_thres;

		if (addto_update_list(mysql_conn->update_list,
				      SLURMDB_MODIFY_QOS, qos_rec)
		    != SLURM_SUCCESS)
			slurmdb_destroy_qos_rec(qos_rec);
	}
	mysql_free_result(result);

	FREE_NULL_BITMAP(preempt_bitstr);

	if (row) {
		xfree(vals);
		xfree(name_char);
		xfree(query);
		FREE_NULL_LIST(ret_list);
		ret_list = NULL;
		errno = ESLURM_QOS_PREEMPTION_LOOP;
		return ret_list;
	}

	if (!list_count(ret_list)) {
		errno = SLURM_NO_CHANGE_IN_DATA;
		if (debug_flags & DEBUG_FLAG_DB_QOS)
			DB_DEBUG(mysql_conn->conn,
				 "didn't effect anything\n%s", query);
		xfree(vals);
		xfree(query);
		return ret_list;
	}
	xfree(query);
	xstrcat(name_char, ")");

	user_name = uid_to_string((uid_t) uid);
	rc = modify_common(mysql_conn, DBD_MODIFY_QOS, now,
			   user_name, qos_table, name_char, vals, NULL);
	xfree(user_name);
	xfree(name_char);
	xfree(vals);
	if (rc == SLURM_ERROR) {
		error("Couldn't modify qos");
		FREE_NULL_LIST(ret_list);
		ret_list = NULL;
	}

	return ret_list;
}
extern int as_mysql_add_qos(mysql_conn_t *mysql_conn, uint32_t uid,
			    List qos_list)
{
	ListIterator itr = NULL;
	int rc = SLURM_SUCCESS;
	slurmdb_qos_rec_t *object = NULL;
	char *cols = NULL, *extra = NULL, *vals = NULL, *query = NULL,
		*tmp_extra = NULL;
	time_t now = time(NULL);
	char *user_name = NULL;
	int affect_rows = 0;
	int added = 0;
	char *added_preempt = NULL;
	uint32_t qos_cnt;
	assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK,
				   NO_LOCK, NO_LOCK, NO_LOCK };

	if (check_connection(mysql_conn) != SLURM_SUCCESS)
		return ESLURM_DB_CONNECTION;

	if (!is_user_min_admin_level(mysql_conn, uid, SLURMDB_ADMIN_SUPER_USER))
		return ESLURM_ACCESS_DENIED;

	assoc_mgr_lock(&locks);
	qos_cnt = g_qos_count;
	assoc_mgr_unlock(&locks);

	user_name = uid_to_string((uid_t) uid);
	itr = list_iterator_create(qos_list);

	while ((object = list_next(itr))) {
		if (!object->name || !object->name[0]) {
			error("We need a qos name to add.");
			rc = SLURM_ERROR;
			continue;
		}
		xstrcat(cols, "creation_time, mod_time, name");
		xstrfmtcat(vals, "%ld, %ld, '%s'",
			   now, now, object->name);
		xstrfmtcat(extra, ", mod_time=%ld", now);

		_setup_qos_limits(object, &cols, &vals,
				  &extra, &added_preempt, 1);
		if (added_preempt) {
			object->preempt_bitstr = bit_alloc(qos_cnt);
			bit_unfmt(object->preempt_bitstr, added_preempt+1);
			xfree(added_preempt);
		}

		xstrfmtcat(query,
			   "insert into %s (%s) values (%s) "
			   "on duplicate key update deleted=0, "
			   "id=LAST_INSERT_ID(id)%s;",
			   qos_table, cols, vals, extra);


		if (debug_flags & DEBUG_FLAG_DB_QOS)
			DB_DEBUG(mysql_conn->conn, "query\n%s", query);
		object->id = (uint32_t)mysql_db_insert_ret_id(
			mysql_conn, query);
		xfree(query);
		if (!object->id) {
			error("Couldn't add qos %s", object->name);
			added=0;
			xfree(cols);
			xfree(extra);
			xfree(vals);
			break;
		}

		affect_rows = last_affected_rows(mysql_conn);

		if (!affect_rows) {
			debug2("nothing changed %d", affect_rows);
			xfree(cols);
			xfree(extra);
			xfree(vals);
			continue;
		}

		/* we always have a ', ' as the first 2 chars */
		tmp_extra = slurm_add_slash_to_quotes(extra+2);

		xstrfmtcat(query,
			   "insert into %s "
			   "(timestamp, action, name, actor, info) "
			   "values (%ld, %u, '%s', '%s', '%s');",
			   txn_table,
			   now, DBD_ADD_QOS, object->name, user_name,
			   tmp_extra);

		xfree(tmp_extra);
		xfree(cols);
		xfree(extra);
		xfree(vals);
		debug4("query\n%s",query);
		rc = mysql_db_query(mysql_conn, query);
		xfree(query);
		if (rc != SLURM_SUCCESS) {
			error("Couldn't add txn");
		} else {
			if (addto_update_list(mysql_conn->update_list,
					      SLURMDB_ADD_QOS,
					      object) == SLURM_SUCCESS)
				list_remove(itr);
			added++;
		}

	}
	list_iterator_destroy(itr);
	xfree(user_name);

	if (!added) {
		reset_mysql_conn(mysql_conn);
	}

	return rc;
}
extern List as_mysql_get_qos(mysql_conn_t *mysql_conn, uid_t uid,
			     slurmdb_qos_cond_t *qos_cond)
{
	char *query = NULL;
	char *extra = NULL;
	char *tmp = NULL;
	List qos_list = NULL;
	ListIterator itr = NULL;
	char *object = NULL;
	int set = 0;
	int i=0;
	MYSQL_RES *result = NULL;
	MYSQL_ROW row;
	uint32_t qos_cnt;
	assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, READ_LOCK, NO_LOCK,
				   NO_LOCK, NO_LOCK, NO_LOCK };

	/* if this changes you will need to edit the corresponding enum */
	char *qos_req_inx[] = {
		"name",
		"description",
		"id",
		"flags",
		"grace_time",
		"grp_tres_mins",
		"grp_tres_run_mins",
		"grp_tres",
		"grp_jobs",
		"grp_submit_jobs",
		"grp_wall",
		"max_tres_mins_pj",
		"max_tres_run_mins_pa",
		"max_tres_run_mins_pu",
		"max_tres_pa",
		"max_tres_pj",
		"max_tres_pn",
		"max_tres_pu",
		"max_jobs_pa",
		"max_jobs_per_user",
		"max_submit_jobs_pa",
		"max_submit_jobs_per_user",
		"max_wall_duration_per_job",
		"substr(preempt, 1, length(preempt) - 1)",
		"preempt_mode",
		"priority",
		"usage_factor",
		"usage_thres",
		"min_tres_pj",
	};
	enum {
		QOS_REQ_NAME,
		QOS_REQ_DESC,
		QOS_REQ_ID,
		QOS_REQ_FLAGS,
		QOS_REQ_GRACE,
		QOS_REQ_GTM,
		QOS_REQ_GTRM,
		QOS_REQ_GT,
		QOS_REQ_GJ,
		QOS_REQ_GSJ,
		QOS_REQ_GW,
		QOS_REQ_MTMPJ,
		QOS_REQ_MTRMA,
		QOS_REQ_MTRM,
		QOS_REQ_MTPA,
		QOS_REQ_MTPJ,
		QOS_REQ_MTPN,
		QOS_REQ_MTPU,
		QOS_REQ_MJPA,
		QOS_REQ_MJPU,
		QOS_REQ_MSJPA,
		QOS_REQ_MSJPU,
		QOS_REQ_MWPJ,
		QOS_REQ_PREE,
		QOS_REQ_PREEM,
		QOS_REQ_PRIO,
		QOS_REQ_UF,
		QOS_REQ_UT,
		QOS_REQ_MITPJ,
		QOS_REQ_COUNT
	};

	if (check_connection(mysql_conn) != SLURM_SUCCESS)
		return NULL;

	if (!qos_cond) {
		xstrcat(extra, "where deleted=0");
		goto empty;
	}

	if (qos_cond->with_deleted)
		xstrcat(extra, "where (deleted=0 || deleted=1)");
	else
		xstrcat(extra, "where deleted=0");


	if (qos_cond->description_list
	    && list_count(qos_cond->description_list)) {
		set = 0;
		xstrcat(extra, " && (");
		itr = list_iterator_create(qos_cond->description_list);
		while ((object = list_next(itr))) {
			if (set)
				xstrcat(extra, " || ");
			xstrfmtcat(extra, "description='%s'", object);
			set = 1;
		}
		list_iterator_destroy(itr);
		xstrcat(extra, ")");
	}

	if (qos_cond->id_list
	    && list_count(qos_cond->id_list)) {
		set = 0;
		xstrcat(extra, " && (");
		itr = list_iterator_create(qos_cond->id_list);
		while ((object = list_next(itr))) {
			if (set)
				xstrcat(extra, " || ");
			xstrfmtcat(extra, "id='%s'", object);
			set = 1;
		}
		list_iterator_destroy(itr);
		xstrcat(extra, ")");
	}

	if (qos_cond->name_list
	    && list_count(qos_cond->name_list)) {
		set = 0;
		xstrcat(extra, " && (");
		itr = list_iterator_create(qos_cond->name_list);
		while ((object = list_next(itr))) {
			if (set)
				xstrcat(extra, " || ");
			xstrfmtcat(extra, "name='%s'", object);
			set = 1;
		}
		list_iterator_destroy(itr);
		xstrcat(extra, ")");
	}

empty:

	xfree(tmp);
	xstrfmtcat(tmp, "%s", qos_req_inx[i]);
	for(i=1; i<QOS_REQ_COUNT; i++) {
		xstrfmtcat(tmp, ", %s", qos_req_inx[i]);
	}

	query = xstrdup_printf("select %s from %s %s", tmp, qos_table, extra);
	xfree(tmp);
	xfree(extra);

	if (debug_flags & DEBUG_FLAG_DB_QOS)
		DB_DEBUG(mysql_conn->conn, "query\n%s", query);
	if (!(result = mysql_db_query_ret(
		      mysql_conn, query, 0))) {
		xfree(query);
		return NULL;
	}
	xfree(query);

	qos_list = list_create(slurmdb_destroy_qos_rec);

	assoc_mgr_lock(&locks);
	qos_cnt = g_qos_count;
	assoc_mgr_unlock(&locks);

	while ((row = mysql_fetch_row(result))) {
		slurmdb_qos_rec_t *qos = xmalloc(sizeof(slurmdb_qos_rec_t));
		list_append(qos_list, qos);

		if (row[QOS_REQ_DESC] && row[QOS_REQ_DESC][0])
			qos->description = xstrdup(row[QOS_REQ_DESC]);

		qos->id = slurm_atoul(row[QOS_REQ_ID]);

		qos->flags = slurm_atoul(row[QOS_REQ_FLAGS]);

		if (row[QOS_REQ_NAME] && row[QOS_REQ_NAME][0])
			qos->name = xstrdup(row[QOS_REQ_NAME]);

		if (row[QOS_REQ_GRACE])
			qos->grace_time = slurm_atoul(row[QOS_REQ_GRACE]);

		if (row[QOS_REQ_GT][0])
			qos->grp_tres = xstrdup(row[QOS_REQ_GT]);
		if (row[QOS_REQ_GTM][0])
			qos->grp_tres_mins = xstrdup(row[QOS_REQ_GTM]);
		if (row[QOS_REQ_GTRM][0])
			qos->grp_tres_run_mins = xstrdup(row[QOS_REQ_GTRM]);

		if (row[QOS_REQ_GJ])
			qos->grp_jobs = slurm_atoul(row[QOS_REQ_GJ]);
		else
			qos->grp_jobs = INFINITE;
		if (row[QOS_REQ_GSJ])
			qos->grp_submit_jobs = slurm_atoul(row[QOS_REQ_GSJ]);
		else
			qos->grp_submit_jobs = INFINITE;
		if (row[QOS_REQ_GW])
			qos->grp_wall = slurm_atoul(row[QOS_REQ_GW]);
		else
			qos->grp_wall = INFINITE;

		if (row[QOS_REQ_MJPA])
			qos->max_jobs_pa = slurm_atoul(row[QOS_REQ_MJPA]);
		else
			qos->max_jobs_pa = INFINITE;

		if (row[QOS_REQ_MJPU])
			qos->max_jobs_pu = slurm_atoul(row[QOS_REQ_MJPU]);
		else
			qos->max_jobs_pu = INFINITE;

		if (row[QOS_REQ_MSJPA])
			qos->max_submit_jobs_pa =
				slurm_atoul(row[QOS_REQ_MSJPA]);
		else
			qos->max_submit_jobs_pa = INFINITE;

		if (row[QOS_REQ_MSJPU])
			qos->max_submit_jobs_pu =
				slurm_atoul(row[QOS_REQ_MSJPU]);
		else
			qos->max_submit_jobs_pu = INFINITE;

		if (row[QOS_REQ_MTPA][0])
			qos->max_tres_pa = xstrdup(row[QOS_REQ_MTPA]);

		if (row[QOS_REQ_MTPJ][0])
			qos->max_tres_pj = xstrdup(row[QOS_REQ_MTPJ]);

		if (row[QOS_REQ_MTPN][0])
			qos->max_tres_pn = xstrdup(row[QOS_REQ_MTPN]);

		if (row[QOS_REQ_MTPU][0])
			qos->max_tres_pu = xstrdup(row[QOS_REQ_MTPU]);

		if (row[QOS_REQ_MTMPJ][0])
			qos->max_tres_mins_pj = xstrdup(row[QOS_REQ_MTMPJ]);

		if (row[QOS_REQ_MTRMA][0])
			qos->max_tres_run_mins_pa = xstrdup(row[QOS_REQ_MTRMA]);

		if (row[QOS_REQ_MTRM][0])
			qos->max_tres_run_mins_pu = xstrdup(row[QOS_REQ_MTRM]);

		if (row[QOS_REQ_MWPJ])
			qos->max_wall_pj = slurm_atoul(row[QOS_REQ_MWPJ]);
		else
			qos->max_wall_pj = INFINITE;

		if (row[QOS_REQ_PREE] && row[QOS_REQ_PREE][0]) {
			if (!qos->preempt_bitstr)
				qos->preempt_bitstr = bit_alloc(qos_cnt);
			bit_unfmt(qos->preempt_bitstr, row[QOS_REQ_PREE]+1);
		}
		if (row[QOS_REQ_PREEM])
			qos->preempt_mode = slurm_atoul(row[QOS_REQ_PREEM]);
		if (row[QOS_REQ_PRIO])
			qos->priority = slurm_atoul(row[QOS_REQ_PRIO]);

		if (row[QOS_REQ_UF])
			qos->usage_factor = atof(row[QOS_REQ_UF]);

		if (row[QOS_REQ_UT])
			qos->usage_thres = atof(row[QOS_REQ_UT]);
		else
			qos->usage_thres = (double)INFINITE;

		if (row[QOS_REQ_MITPJ][0])
			qos->min_tres_pj = xstrdup(row[QOS_REQ_MITPJ]);
	}
	mysql_free_result(result);

	return qos_list;
}
Beispiel #10
0
/*
 * acct_policy_job_runnable - Determine of the specified job can execute
 *	right now or not depending upon accounting policy (e.g. running
 *	job limit for this association). If the association limits prevent
 *	the job from ever running (lowered limits since job submission),
 *	then cancel the job.
 */
extern bool acct_policy_job_runnable(struct job_record *job_ptr)
{
	slurmdb_qos_rec_t *qos_ptr;
	slurmdb_association_rec_t *assoc_ptr;
	uint32_t time_limit;
	uint64_t cpu_time_limit;
	uint64_t job_cpu_time_limit;
	bool rc = true;
	uint64_t usage_mins;
	uint32_t wall_mins;
	bool cancel_job = 0;
	int parent = 0; /*flag to tell us if we are looking at the
			 * parent or not
			 */
	assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK,
				   READ_LOCK, NO_LOCK, NO_LOCK };

	/* check to see if we are enforcing associations */
	if (!accounting_enforce)
		return true;

	if (!_valid_job_assoc(job_ptr)) {
		_cancel_job(job_ptr);
		return false;
	}

	/* now see if we are enforcing limits */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return true;

	/* clear old state reason */
        if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) ||
	    (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) ||
	    (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT))
                job_ptr->state_reason = WAIT_NO_REASON;


	job_cpu_time_limit = (uint64_t)job_ptr->time_limit
		* (uint64_t)job_ptr->details->min_cpus;

	assoc_mgr_lock(&locks);
	qos_ptr = job_ptr->qos_ptr;
	if(qos_ptr) {
		usage_mins = (uint64_t)(qos_ptr->usage->usage_raw / 60.0);
		wall_mins = qos_ptr->usage->grp_used_wall / 60;

		if ((qos_ptr->grp_cpu_mins != (uint64_t)INFINITE)
		    && (usage_mins >= qos_ptr->grp_cpu_mins)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("Job %u being held, "
			       "the job is at or exceeds QOS %s's "
			       "group max cpu minutes of %"PRIu64" "
			       "with %"PRIu64"",
			       job_ptr->job_id,
			       qos_ptr->name,
			       qos_ptr->grp_cpu_mins,
			       usage_mins);
			rc = false;
			goto end_it;
		}

		if ((job_ptr->limit_set_min_cpus != ADMIN_SET_LIMIT)
		    && qos_ptr->grp_cpus != INFINITE) {
			if (job_ptr->details->min_cpus > qos_ptr->grp_cpus) {
				info("job %u is being cancelled, "
				     "min cpu request %u exceeds "
				     "group max cpu limit %u for "
				     "qos '%s'",
				     job_ptr->job_id,
				     job_ptr->details->min_cpus,
				     qos_ptr->grp_cpus,
				     qos_ptr->name);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}

			if ((qos_ptr->usage->grp_used_cpus +
			     job_ptr->details->min_cpus) > qos_ptr->grp_cpus) {
				job_ptr->state_reason =
					WAIT_ASSOC_RESOURCE_LIMIT;
				xfree(job_ptr->state_desc);
				debug2("job %u being held, "
				       "the job is at or exceeds "
				       "group max cpu limit %u "
				       "with already used %u + requested %u "
				       "for qos %s",
				       job_ptr->job_id,
				       qos_ptr->grp_cpus,
				       qos_ptr->usage->grp_used_cpus,
				       job_ptr->details->min_cpus,
				       qos_ptr->name);
				rc = false;
				goto end_it;
			}
		}

		if ((qos_ptr->grp_jobs != INFINITE) &&
		    (qos_ptr->usage->grp_used_jobs >= qos_ptr->grp_jobs)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "the job is at or exceeds "
			       "group max jobs limit %u with %u for qos %s",
			       job_ptr->job_id,
			       qos_ptr->grp_jobs,
			       qos_ptr->usage->grp_used_jobs, qos_ptr->name);

			rc = false;
			goto end_it;
		}

		if ((job_ptr->limit_set_min_nodes != ADMIN_SET_LIMIT)
		    && qos_ptr->grp_nodes != INFINITE) {
			if (job_ptr->details->min_nodes > qos_ptr->grp_nodes) {
				info("job %u is being cancelled, "
				     "min node request %u exceeds "
				     "group max node limit %u for "
				     "qos '%s'",
				     job_ptr->job_id,
				     job_ptr->details->min_nodes,
				     qos_ptr->grp_nodes,
				     qos_ptr->name);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}

			if ((qos_ptr->usage->grp_used_nodes +
			     job_ptr->details->min_nodes) >
			    qos_ptr->grp_nodes) {
				job_ptr->state_reason =
					WAIT_ASSOC_RESOURCE_LIMIT;
				xfree(job_ptr->state_desc);
				debug2("job %u being held, "
				       "the job is at or exceeds "
				       "group max node limit %u "
				       "with already used %u + requested %u "
				       "for qos %s",
				       job_ptr->job_id,
				       qos_ptr->grp_nodes,
				       qos_ptr->usage->grp_used_nodes,
				       job_ptr->details->min_nodes,
				       qos_ptr->name);
				rc = false;
				goto end_it;
			}
		}

		/* we don't need to check submit_jobs here */

		if ((qos_ptr->grp_wall != INFINITE)
		    && (wall_mins >= qos_ptr->grp_wall)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "the job is at or exceeds "
			       "group wall limit %u "
			       "with %u for qos %s",
			       job_ptr->job_id,
			       qos_ptr->grp_wall,
			       wall_mins, qos_ptr->name);
			rc = false;
			goto end_it;
		}

		if (qos_ptr->max_cpu_mins_pj != INFINITE) {
			cpu_time_limit = qos_ptr->max_cpu_mins_pj;
			if ((job_ptr->time_limit != NO_VAL) &&
			    (job_cpu_time_limit > cpu_time_limit)) {
				info("job %u being cancelled, "
				     "cpu time limit %"PRIu64" exceeds "
				     "qos max per job %"PRIu64"",
				     job_ptr->job_id,
				     job_cpu_time_limit,
				     cpu_time_limit);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		if ((job_ptr->limit_set_min_cpus != ADMIN_SET_LIMIT)
		    && qos_ptr->max_cpus_pj != INFINITE) {
			if (job_ptr->details->min_cpus >
			    qos_ptr->max_cpus_pj) {
				info("job %u being cancelled, "
				     "min cpu limit %u exceeds "
				     "qos max %u",
				     job_ptr->job_id,
				     job_ptr->details->min_cpus,
				     qos_ptr->max_cpus_pj);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		if (qos_ptr->max_jobs_pu != INFINITE) {
			slurmdb_used_limits_t *used_limits = NULL;
			if(qos_ptr->usage->user_limit_list) {
				ListIterator itr = list_iterator_create(
					qos_ptr->usage->user_limit_list);
				while((used_limits = list_next(itr))) {
					if(used_limits->uid == job_ptr->user_id)
						break;
				}
				list_iterator_destroy(itr);
			}
			if(used_limits && (used_limits->jobs
					   >= qos_ptr->max_jobs_pu)) {
				debug2("job %u being held, "
				       "the job is at or exceeds "
				       "max jobs limit %u with %u for QOS %s",
				       job_ptr->job_id,
				       qos_ptr->max_jobs_pu,
				       used_limits->jobs, qos_ptr->name);
				rc = false;
				goto end_it;
			}
		}

		if ((job_ptr->limit_set_min_nodes != ADMIN_SET_LIMIT)
		    && qos_ptr->max_nodes_pj != INFINITE) {
			if (job_ptr->details->min_nodes >
			    qos_ptr->max_nodes_pj) {
				info("job %u being cancelled, "
				     "min node limit %u exceeds "
				     "qos max %u",
				     job_ptr->job_id,
				     job_ptr->details->min_nodes,
				     qos_ptr->max_nodes_pj);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		/* we don't need to check submit_jobs_pu here */

		/* if the qos limits have changed since job
		 * submission and job can not run, then kill it */
		if ((job_ptr->limit_set_time != ADMIN_SET_LIMIT)
		    && qos_ptr->max_wall_pj != INFINITE) {
			time_limit = qos_ptr->max_wall_pj;
			if ((job_ptr->time_limit != NO_VAL) &&
			    (job_ptr->time_limit > time_limit)) {
				info("job %u being cancelled, "
				     "time limit %u exceeds qos "
				     "max wall pj %u",
				     job_ptr->job_id,
				     job_ptr->time_limit,
				     time_limit);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}
	}

	assoc_ptr = job_ptr->assoc_ptr;
	while(assoc_ptr) {
		usage_mins = (uint64_t)(assoc_ptr->usage->usage_raw / 60.0);
		wall_mins = assoc_ptr->usage->grp_used_wall / 60;
#if _DEBUG
		info("acct_job_limits: %u of %u",
		     assoc_ptr->usage->used_jobs, assoc_ptr->max_jobs);
#endif
		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->grp_cpu_mins == (uint64_t)INFINITE))
		    && (assoc_ptr->grp_cpu_mins != (uint64_t)INFINITE)
		    && (usage_mins >= assoc_ptr->grp_cpu_mins)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "assoc %u is at or exceeds "
			       "group max cpu minutes limit %"PRIu64" "
			       "with %Lf for account %s",
			       job_ptr->job_id, assoc_ptr->id,
			       assoc_ptr->grp_cpu_mins,
			       assoc_ptr->usage->usage_raw, assoc_ptr->acct);

			rc = false;
			goto end_it;
		}

		if ((job_ptr->limit_set_min_cpus != ADMIN_SET_LIMIT)
		    && (!qos_ptr ||
			(qos_ptr && qos_ptr->grp_cpus == INFINITE))
		    && (assoc_ptr->grp_cpus != INFINITE)) {
			if (job_ptr->details->min_cpus > assoc_ptr->grp_cpus) {
				info("job %u being cancelled, "
				     "min cpu request %u exceeds "
				     "group max cpu limit %u for "
				     "account %s",
				     job_ptr->job_id,
				     job_ptr->details->min_cpus,
				     assoc_ptr->grp_cpus,
				     assoc_ptr->acct);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}

			if ((assoc_ptr->usage->grp_used_cpus +
				    job_ptr->details->min_cpus) >
				   assoc_ptr->grp_cpus) {
				job_ptr->state_reason =
					WAIT_ASSOC_RESOURCE_LIMIT;
				xfree(job_ptr->state_desc);
				debug2("job %u being held, "
				       "assoc %u is at or exceeds "
				       "group max cpu limit %u "
				       "with already used %u + requested %u "
				       "for account %s",
				       job_ptr->job_id, assoc_ptr->id,
				       assoc_ptr->grp_cpus,
				       assoc_ptr->usage->grp_used_cpus,
				       job_ptr->details->min_cpus,
				       assoc_ptr->acct);
				rc = false;
				goto end_it;
			}
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->grp_jobs == INFINITE)) &&
		    (assoc_ptr->grp_jobs != INFINITE) &&
		    (assoc_ptr->usage->used_jobs >= assoc_ptr->grp_jobs)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "assoc %u is at or exceeds "
			       "group max jobs limit %u with %u for account %s",
			       job_ptr->job_id, assoc_ptr->id,
			       assoc_ptr->grp_jobs,
			       assoc_ptr->usage->used_jobs, assoc_ptr->acct);

			rc = false;
			goto end_it;
		}

		if ((job_ptr->limit_set_min_nodes != ADMIN_SET_LIMIT)
		    && (!qos_ptr ||
			(qos_ptr && qos_ptr->grp_nodes == INFINITE))
		    && (assoc_ptr->grp_nodes != INFINITE)) {
			if (job_ptr->details->min_nodes >
			    assoc_ptr->grp_nodes) {
				info("job %u being cancelled, "
				     "min node request %u exceeds "
				     "group max node limit %u for "
				     "account %s",
				     job_ptr->job_id,
				     job_ptr->details->min_nodes,
				     assoc_ptr->grp_nodes,
				     assoc_ptr->acct);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}

			if ((assoc_ptr->usage->grp_used_nodes +
			     job_ptr->details->min_nodes) >
			    assoc_ptr->grp_nodes) {
				job_ptr->state_reason =
					WAIT_ASSOC_RESOURCE_LIMIT;
				xfree(job_ptr->state_desc);
				debug2("job %u being held, "
				       "assoc %u is at or exceeds "
				       "group max node limit %u "
				       "with already used %u + requested %u "
				       "for account %s",
				       job_ptr->job_id, assoc_ptr->id,
				       assoc_ptr->grp_nodes,
				       assoc_ptr->usage->grp_used_nodes,
				       job_ptr->details->min_nodes,
				       assoc_ptr->acct);
				rc = false;
				goto end_it;
			}
		}

		/* we don't need to check submit_jobs here */

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->grp_wall == INFINITE))
		    && (assoc_ptr->grp_wall != INFINITE)
		    && (wall_mins >= assoc_ptr->grp_wall)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "assoc %u is at or exceeds "
			       "group wall limit %u "
			       "with %u for account %s",
			       job_ptr->job_id, assoc_ptr->id,
			       assoc_ptr->grp_wall,
			       wall_mins, assoc_ptr->acct);
			rc = false;
			goto end_it;
		}


		/* We don't need to look at the regular limits for
		 * parents since we have pre-propogated them, so just
		 * continue with the next parent
		 */
		if(parent) {
			assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
			continue;
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->max_cpu_mins_pj == INFINITE)) &&
		    (assoc_ptr->max_cpu_mins_pj != INFINITE)) {
			cpu_time_limit = assoc_ptr->max_cpu_mins_pj;
			if ((job_ptr->time_limit != NO_VAL) &&
			    (job_cpu_time_limit > cpu_time_limit)) {
				info("job %u being cancelled, "
				     "cpu time limit %"PRIu64" exceeds "
				     "assoc max per job %"PRIu64"",
				     job_ptr->job_id,
				     job_cpu_time_limit,
				     cpu_time_limit);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->max_cpus_pj == INFINITE)) &&
		    (assoc_ptr->max_cpus_pj != INFINITE)) {
			if (job_ptr->details->min_cpus >
			    assoc_ptr->max_cpus_pj) {
				info("job %u being cancelled, "
				     "min cpu limit %u exceeds "
				     "account max %u",
				     job_ptr->job_id,
				     job_ptr->details->min_cpus,
				     assoc_ptr->max_cpus_pj);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->max_jobs_pu == INFINITE)) &&
		    (assoc_ptr->max_jobs != INFINITE) &&
		    (assoc_ptr->usage->used_jobs >= assoc_ptr->max_jobs)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "assoc %u is at or exceeds "
			       "max jobs limit %u with %u for account %s",
			       job_ptr->job_id, assoc_ptr->id,
			       assoc_ptr->max_jobs,
			       assoc_ptr->usage->used_jobs, assoc_ptr->acct);
			rc = false;
			goto end_it;
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->max_nodes_pj == INFINITE))
		    && (assoc_ptr->max_nodes_pj != INFINITE)) {
			if (job_ptr->details->min_nodes >
			    assoc_ptr->max_nodes_pj) {
				info("job %u being cancelled, "
				     "min node limit %u exceeds "
				     "account max %u",
				     job_ptr->job_id,
				     job_ptr->details->min_nodes,
				     assoc_ptr->max_nodes_pj);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		/* we don't need to check submit_jobs here */

		/* if the association limits have changed since job
		 * submission and job can not run, then kill it */
		if ((job_ptr->limit_set_time != ADMIN_SET_LIMIT)
		    && (!qos_ptr ||
			(qos_ptr && qos_ptr->max_wall_pj == INFINITE))
		    && (assoc_ptr->max_wall_pj != INFINITE)) {
			time_limit = assoc_ptr->max_wall_pj;
			if ((job_ptr->time_limit != NO_VAL) &&
			    (job_ptr->time_limit > time_limit)) {
				info("job %u being cancelled, "
				     "time limit %u exceeds account "
				     "max %u",
				     job_ptr->job_id,
				     job_ptr->time_limit,
				     time_limit);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		parent = 1;
	}
end_it:
	assoc_mgr_unlock(&locks);

	if(cancel_job)
		_cancel_job(job_ptr);

	return rc;
}
Beispiel #11
0
extern bool acct_policy_node_usable(struct job_record *job_ptr,
				    uint32_t used_cpus,
				    char *node_name, uint32_t node_cpus)
{
	slurmdb_qos_rec_t *qos_ptr;
	slurmdb_association_rec_t *assoc_ptr;
	bool rc = true;
	uint32_t total_cpus = used_cpus + node_cpus;
	bool cancel_job = 0;
	int parent = 0; /* flag to tell us if we are looking at the
			 * parent or not
			 */
	assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK,
				   READ_LOCK, NO_LOCK, NO_LOCK };

	/* check to see if we are enforcing associations */
	if (!accounting_enforce)
		return true;

	if (!_valid_job_assoc(job_ptr)) {
		_cancel_job(job_ptr);
		return false;
	}

	/* now see if we are enforcing limits */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return true;

	/* clear old state reason */
        if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) ||
	    (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) ||
	    (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT))
                job_ptr->state_reason = WAIT_NO_REASON;


	assoc_mgr_lock(&locks);
	qos_ptr = job_ptr->qos_ptr;
	if(qos_ptr) {
		if (qos_ptr->grp_cpus != INFINITE) {
			if ((total_cpus+qos_ptr->usage->grp_used_cpus)
			    > qos_ptr->grp_cpus) {
				debug("Can't use %s, adding it's %u cpus "
				      "exceeds "
				      "group max cpu limit %u for qos '%s'",
				     node_name,
				     node_cpus,
				     qos_ptr->grp_cpus,
				     qos_ptr->name);
				rc = false;
				goto end_it;
			}
		}

		if (qos_ptr->max_cpus_pj != INFINITE) {
			if (total_cpus > qos_ptr->max_cpus_pj) {
				debug("Can't use %s, adding it's %u cpus "
				      "exceeds "
				      "max cpu limit %u for qos '%s'",
				      node_name,
				      node_cpus,
				      qos_ptr->max_cpus_pj,
				      qos_ptr->name);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}
	}

	assoc_ptr = job_ptr->assoc_ptr;
	while(assoc_ptr) {
		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->grp_cpus == INFINITE))
		    && (assoc_ptr->grp_cpus != INFINITE)) {
			if ((total_cpus+assoc_ptr->usage->grp_used_cpus)
			    > assoc_ptr->grp_cpus) {
				debug("Can't use %s, adding it's %u cpus "
				      "exceeds "
				      "group max cpu limit %u for account '%s'",
				      node_name,
				      node_cpus,
				      assoc_ptr->grp_cpus,
				      assoc_ptr->acct);
				rc = false;
				goto end_it;
			}
		}

		/* We don't need to look at the regular limits for
		 * parents since we have pre-propogated them, so just
		 * continue with the next parent
		 */
		if(parent) {
			assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
			continue;
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->max_cpus_pj == INFINITE)) &&
		    (assoc_ptr->max_cpus_pj != INFINITE)) {
			if (job_ptr->details->min_cpus >
			    assoc_ptr->max_cpus_pj) {
				debug("Can't use %s, adding it's %u cpus "
				      "exceeds "
				      "max cpu limit %u for account '%s'",
				      node_name,
				      node_cpus,
				      assoc_ptr->max_cpus_pj,
				      assoc_ptr->acct);
				rc = false;
				goto end_it;
			}
		}
		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		parent = 1;
	}
end_it:
	assoc_mgr_unlock(&locks);

	if(cancel_job)
		_cancel_job(job_ptr);

	return rc;
}
static int	_job_modify(uint32_t jobid, char *bank_ptr,
			char *depend_ptr, char *new_hostlist,
			uint32_t new_node_cnt, char *part_name_ptr,
			uint32_t new_time_limit, char *name_ptr,
			char *start_ptr, char *feature_ptr, char *env_ptr,
			char *comment_ptr, char *gres_ptr, char *wckey_ptr)
{
	struct job_record *job_ptr;
	time_t now = time(NULL);
	bool update_accounting = false;

	job_ptr = find_job_record(jobid);
	if (job_ptr == NULL) {
		error("wiki: MODIFYJOB has invalid jobid %u", jobid);
		return ESLURM_INVALID_JOB_ID;
	}
	if (IS_JOB_FINISHED(job_ptr) || (job_ptr->details == NULL)) {
		info("wiki: MODIFYJOB jobid %u is finished", jobid);
		return ESLURM_DISABLED;
	}

	if (comment_ptr) {
		info("wiki: change job %u comment %s", jobid, comment_ptr);
		xfree(job_ptr->comment);
		job_ptr->comment = xstrdup(comment_ptr);
		last_job_update = now;
	}

	if (depend_ptr) {
		int rc = update_job_dependency(job_ptr, depend_ptr);
		if (rc == SLURM_SUCCESS) {
			info("wiki: changed job %u dependency to %s",
				jobid, depend_ptr);
		} else {
			error("wiki: changing job %u dependency to %s",
				jobid, depend_ptr);
			return EINVAL;
		}
	}

	if (env_ptr) {
		bool have_equal = false;
		char old_sep[1];
		int begin = 0, i;

		if (job_ptr->batch_flag == 0) {
			error("wiki: attempt to set environment variables "
			      "for non-batch job %u", jobid);
			return ESLURM_DISABLED;
		}
		for (i=0; ; i++) {
			if (env_ptr[i] == '=') {
				if (have_equal) {
					error("wiki: setting job %u invalid "
					      "environment variables: %s",
					      jobid, env_ptr);
					return EINVAL;
				}
				have_equal = true;
				if (env_ptr[i+1] == '\"') {
					for (i+=2; ; i++) {
						if (env_ptr[i] == '\0') {
							error("wiki: setting job %u "
							      "invalid environment "
							      "variables: %s",
					 		     jobid, env_ptr);
							return EINVAL;
						}
						if (env_ptr[i] == '\"') {
							i++;
							break;
						}
						if (env_ptr[i] == '\\') {
							i++;
						}
					}
				} else if (env_ptr[i+1] == '\'') {
					for (i+=2; ; i++) {
						if (env_ptr[i] == '\0') {
							error("wiki: setting job %u "
							      "invalid environment "
							      "variables: %s",
					 		     jobid, env_ptr);
							return EINVAL;
						}
						if (env_ptr[i] == '\'') {
							i++;
							break;
						}
						if (env_ptr[i] == '\\') {
							i++;
						}
					}
				}
			}
			if (isspace(env_ptr[i]) || (env_ptr[i] == ',')) {
				if (!have_equal) {
					error("wiki: setting job %u invalid "
					      "environment variables: %s",
					      jobid, env_ptr);
					return EINVAL;
				}
				old_sep[0] = env_ptr[i];
				env_ptr[i] = '\0';
				xrealloc(job_ptr->details->env_sup,
					 sizeof(char *) *
					 (job_ptr->details->env_cnt+1));
				job_ptr->details->env_sup
						[job_ptr->details->env_cnt++] =
						xstrdup(&env_ptr[begin]);
				info("wiki: for job %u add env: %s",
				     jobid, &env_ptr[begin]);
				env_ptr[i] = old_sep[0];
				if (isspace(old_sep[0]))
					break;
				begin = i + 1;
				have_equal = false;
			}
		}
	}

	if (new_time_limit) {
		time_t old_time = job_ptr->time_limit;
		job_ptr->time_limit = new_time_limit;
		info("wiki: change job %u time_limit to %u",
			jobid, new_time_limit);
		/* Update end_time based upon change
		 * to preserve suspend time info */
		job_ptr->end_time = job_ptr->end_time +
				((job_ptr->time_limit -
				  old_time) * 60);
		last_job_update = now;
	}

	if (bank_ptr &&
	    (update_job_account("wiki", job_ptr, bank_ptr) != SLURM_SUCCESS)) {
		return EINVAL;
	}

	if (feature_ptr) {
		if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) {
			info("wiki: change job %u features to %s",
				jobid, feature_ptr);
			job_ptr->details->features = xstrdup(feature_ptr);
			last_job_update = now;
		} else {
			error("wiki: MODIFYJOB features of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}
	}

	if (start_ptr) {
		char *end_ptr;
		uint32_t begin_time = strtol(start_ptr, &end_ptr, 10);
		if (IS_JOB_PENDING(job_ptr) && (job_ptr->details)) {
			info("wiki: change job %u begin time to %u",
				jobid, begin_time);
			job_ptr->details->begin_time = begin_time;
			last_job_update = now;
			update_accounting = true;
		} else {
			error("wiki: MODIFYJOB begin_time of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}
	}

	if (name_ptr) {
		if (IS_JOB_PENDING(job_ptr)) {
			info("wiki: change job %u name %s", jobid, name_ptr);
			xfree(job_ptr->name);
			job_ptr->name = xstrdup(name_ptr);
			last_job_update = now;
			update_accounting = true;
		} else {
			error("wiki: MODIFYJOB name of non-pending job %u",
			      jobid);
			return ESLURM_DISABLED;
		}
	}

	if (new_hostlist) {
		int rc = 0, task_cnt;
		hostlist_t hl;
		char *tasklist;

		if (!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
			/* Job is done, nothing to reset */
			if (new_hostlist == '\0')
				goto host_fini;
			error("wiki: MODIFYJOB hostlist of non-pending "
				"job %u", jobid);
			return ESLURM_DISABLED;
		}

		xfree(job_ptr->details->req_nodes);
		FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap);
		if (new_hostlist == '\0')
			goto host_fini;

		tasklist = moab2slurm_task_list(new_hostlist, &task_cnt);
		if (tasklist == NULL) {
			rc = 1;
			goto host_fini;
		}
		hl = hostlist_create(tasklist);
		if (hl == 0) {
			rc = 1;
			goto host_fini;
		}
		hostlist_uniq(hl);
		hostlist_sort(hl);
		job_ptr->details->req_nodes =
			hostlist_ranged_string_xmalloc(hl);
		hostlist_destroy(hl);
		if (job_ptr->details->req_nodes == NULL) {
			rc = 1;
			goto host_fini;
		}
		if (node_name2bitmap(job_ptr->details->req_nodes, false,
                                     &job_ptr->details->req_node_bitmap)) {
			rc = 1;
			goto host_fini;
		}

host_fini:	if (rc) {
			info("wiki: change job %u invalid hostlist %s",
				jobid, new_hostlist);
			xfree(job_ptr->details->req_nodes);
			return EINVAL;
		} else {
			info("wiki: change job %u hostlist %s",
				jobid, new_hostlist);
			update_accounting = true;
		}
	}

	if (part_name_ptr) {
		struct part_record *part_ptr;
		if (!IS_JOB_PENDING(job_ptr)) {
			error("wiki: MODIFYJOB partition of non-pending "
			      "job %u", jobid);
			return ESLURM_DISABLED;
		}

		part_ptr = find_part_record(part_name_ptr);
		if (part_ptr == NULL) {
			error("wiki: MODIFYJOB has invalid partition %s",
				part_name_ptr);
			return ESLURM_INVALID_PARTITION_NAME;
		}

		info("wiki: change job %u partition %s",
			jobid, part_name_ptr);
		xfree(job_ptr->partition);
		job_ptr->partition = xstrdup(part_name_ptr);
		job_ptr->part_ptr = part_ptr;
		last_job_update = now;
		update_accounting = true;
	}

	if (new_node_cnt) {
		job_desc_msg_t job_desc;
#ifdef HAVE_BG
		uint16_t geometry[SYSTEM_DIMENSIONS] = {(uint16_t) NO_VAL};
		static uint16_t cpus_per_node = 0;
		if (!cpus_per_node) {
			select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT,
						&cpus_per_node);
		}
#endif
		if (!IS_JOB_PENDING(job_ptr) || !job_ptr->details) {
			error("wiki: MODIFYJOB node count of non-pending "
			      "job %u", jobid);
			return ESLURM_DISABLED;
		}
		memset(&job_desc, 0, sizeof(job_desc_msg_t));

		job_desc.min_nodes = new_node_cnt;
		job_desc.max_nodes = NO_VAL;
		job_desc.select_jobinfo = select_g_select_jobinfo_alloc();

		select_g_alter_node_cnt(SELECT_SET_NODE_CNT, &job_desc);

		select_g_select_jobinfo_free(job_desc.select_jobinfo);

		job_ptr->details->min_nodes = job_desc.min_nodes;
		if (job_ptr->details->max_nodes &&
		    (job_ptr->details->max_nodes < job_desc.min_nodes))
			job_ptr->details->max_nodes = job_desc.min_nodes;
		info("wiki: change job %u min_nodes to %u",
		     jobid, new_node_cnt);
#ifdef HAVE_BG
		job_ptr->details->min_cpus = job_desc.min_cpus;
		job_ptr->details->max_cpus = job_desc.max_cpus;
		job_ptr->details->pn_min_cpus = job_desc.pn_min_cpus;

		new_node_cnt = job_ptr->details->min_cpus;
		if (cpus_per_node)
			new_node_cnt /= cpus_per_node;

		/* This is only set up so accounting is set up correctly */
		select_g_select_jobinfo_set(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_NODE_CNT,
					    &new_node_cnt);
		/* reset geo since changing this makes any geo
		   potentially invalid */
		select_g_select_jobinfo_set(job_ptr->select_jobinfo,
					    SELECT_JOBDATA_GEOMETRY,
					    geometry);
#endif
		last_job_update = now;
		update_accounting = true;
	}

	if (gres_ptr) {
		char *orig_gres;
		assoc_mgr_lock_t locks = {
			NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK,
			READ_LOCK, NO_LOCK, NO_LOCK };

		if (!IS_JOB_PENDING(job_ptr)) {
			error("wiki: MODIFYJOB GRES of non-pending job %u",
			      jobid);
			return ESLURM_DISABLED;
		}

		orig_gres = job_ptr->gres;
		job_ptr->gres = NULL;
		if (gres_ptr[0])
			job_ptr->gres = xstrdup(gres_ptr);
		if (gres_plugin_job_state_validate(job_ptr->gres,
						   &job_ptr->gres_list)) {
			error("wiki: MODIFYJOB Invalid GRES=%s", gres_ptr);
			xfree(job_ptr->gres);
			job_ptr->gres = orig_gres;
			return ESLURM_INVALID_GRES;
		}
		xfree(orig_gres);
		assoc_mgr_lock(&locks);
		gres_set_job_tres_cnt(job_ptr->gres_list,
				      job_ptr->details ?
				      job_ptr->details->min_nodes : 0,
				      job_ptr->tres_req_cnt,
				      true);
		xfree(job_ptr->tres_req_str);
		job_ptr->tres_req_str =
			assoc_mgr_make_tres_str_from_array(
				job_ptr->tres_req_cnt,
				TRES_STR_FLAG_SIMPLE, true);
		assoc_mgr_unlock(&locks);
	}

	if (wckey_ptr) {
		int rc = update_job_wckey("update_job", job_ptr, wckey_ptr);
		if (rc != SLURM_SUCCESS) {
			error("wiki: MODIFYJOB Invalid WCKEY=%s", wckey_ptr);
			return rc;
		}
	}

	if (update_accounting) {
		/* Update job record in accounting to reflect the changes */
		jobacct_storage_job_start_direct(acct_db_conn, job_ptr);
	}

	return SLURM_SUCCESS;
}
Beispiel #13
0
/* assoc_mgr locks need to unlocked before you get here */
static int _get_cluster_usage(mysql_conn_t *mysql_conn, uid_t uid,
			      slurmdb_cluster_rec_t *cluster_rec,
			      slurmdbd_msg_type_t type,
			      time_t start, time_t end)
{
	int rc = SLURM_SUCCESS;
	int i=0;
	MYSQL_RES *result = NULL;
	MYSQL_ROW row;
	char *tmp = NULL;
	char *my_usage_table = cluster_day_table;
	char *query = NULL;
	assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK,
				   READ_LOCK, NO_LOCK, NO_LOCK };
	char *cluster_req_inx[] = {
		"id_tres",
		"alloc_secs",
		"down_secs",
		"pdown_secs",
		"idle_secs",
		"resv_secs",
		"over_secs",
		"count",
		"time_start",
	};

	enum {
		CLUSTER_TRES,
		CLUSTER_ACPU,
		CLUSTER_DCPU,
		CLUSTER_PDCPU,
		CLUSTER_ICPU,
		CLUSTER_RCPU,
		CLUSTER_OCPU,
		CLUSTER_CNT,
		CLUSTER_START,
		CLUSTER_COUNT
	};

	if (!cluster_rec->name || !cluster_rec->name[0]) {
		error("We need a cluster name to set data for");
		return SLURM_ERROR;
	}

	if (set_usage_information(&my_usage_table, type, &start, &end)
	    != SLURM_SUCCESS) {
		return SLURM_ERROR;
	}

	xfree(tmp);
	i=0;
	xstrfmtcat(tmp, "%s", cluster_req_inx[i]);
	for(i=1; i<CLUSTER_COUNT; i++) {
		xstrfmtcat(tmp, ", %s", cluster_req_inx[i]);
	}

	query = xstrdup_printf(
		"select %s from \"%s_%s\" where (time_start < %ld "
		"&& time_start >= %ld)",
		tmp, cluster_rec->name, my_usage_table, end, start);

	xfree(tmp);
	if (debug_flags & DEBUG_FLAG_DB_USAGE)
		DB_DEBUG(mysql_conn->conn, "query\n%s", query);

	if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) {
		xfree(query);
		return SLURM_ERROR;
	}
	xfree(query);

	if (!cluster_rec->accounting_list)
		cluster_rec->accounting_list =
			list_create(slurmdb_destroy_cluster_accounting_rec);

	assoc_mgr_lock(&locks);
	while ((row = mysql_fetch_row(result))) {
		slurmdb_tres_rec_t *tres_rec;
		slurmdb_cluster_accounting_rec_t *accounting_rec =
			xmalloc(sizeof(slurmdb_cluster_accounting_rec_t));

		accounting_rec->tres_rec.id = slurm_atoul(row[CLUSTER_TRES]);
		accounting_rec->tres_rec.count = slurm_atoul(row[CLUSTER_CNT]);
		if ((tres_rec = list_find_first(
			     assoc_mgr_tres_list, slurmdb_find_tres_in_list,
			     &accounting_rec->tres_rec.id))) {
			accounting_rec->tres_rec.name =
				xstrdup(tres_rec->name);
			accounting_rec->tres_rec.type =
				xstrdup(tres_rec->type);
		}

		accounting_rec->alloc_secs = slurm_atoull(row[CLUSTER_ACPU]);
		accounting_rec->down_secs = slurm_atoull(row[CLUSTER_DCPU]);
		accounting_rec->pdown_secs = slurm_atoull(row[CLUSTER_PDCPU]);
		accounting_rec->idle_secs = slurm_atoull(row[CLUSTER_ICPU]);
		accounting_rec->over_secs = slurm_atoull(row[CLUSTER_OCPU]);
		accounting_rec->resv_secs = slurm_atoull(row[CLUSTER_RCPU]);
		accounting_rec->period_start = slurm_atoul(row[CLUSTER_START]);
		list_append(cluster_rec->accounting_list, accounting_rec);
	}
	assoc_mgr_unlock(&locks);

	mysql_free_result(result);
	return rc;
}
Beispiel #14
0
/* assoc_mgr locks need to be unlocked before coming here */
static int _get_object_usage(mysql_conn_t *mysql_conn,
			     slurmdbd_msg_type_t type, char *my_usage_table,
			     char *cluster_name, char *id_str,
			     time_t start, time_t end, List *usage_list)
{
	char *tmp = NULL;
	int i = 0;
	MYSQL_RES *result = NULL;
	MYSQL_ROW row;
	char *query = NULL;
	assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK,
				   READ_LOCK, NO_LOCK, NO_LOCK };

	char *usage_req_inx[] = {
		"t3.id_assoc",
		"t1.id_tres",
		"t1.time_start",
		"t1.alloc_secs",
	};
	enum {
		USAGE_ID,
		USAGE_TRES,
		USAGE_START,
		USAGE_ALLOC,
		USAGE_COUNT
	};

	if (type == DBD_GET_WCKEY_USAGE)
		usage_req_inx[0] = "t1.id";

	xstrfmtcat(tmp, "%s", usage_req_inx[i]);
	for (i=1; i<USAGE_COUNT; i++) {
		xstrfmtcat(tmp, ", %s", usage_req_inx[i]);
	}

	switch (type) {
	case DBD_GET_ASSOC_USAGE:
		query = xstrdup_printf(
			"select %s from \"%s_%s\" as t1, "
			"\"%s_%s\" as t2, \"%s_%s\" as t3 "
			"where (t1.time_start < %ld && t1.time_start >= %ld) "
			"&& t1.id=t2.id_assoc && (%s) && "
			"t2.lft between t3.lft and t3.rgt "
			"order by t3.id_assoc, time_start;",
			tmp, cluster_name, my_usage_table,
			cluster_name, assoc_table, cluster_name, assoc_table,
			end, start, id_str);
		break;
	case DBD_GET_WCKEY_USAGE:
		query = xstrdup_printf(
			"select %s from \"%s_%s\" as t1 "
			"where (time_start < %ld && time_start >= %ld) "
			"&& (%s) order by id, time_start;",
			tmp, cluster_name, my_usage_table, end, start, id_str);
		break;
	default:
		error("Unknown usage type %d", type);
		xfree(tmp);
		return SLURM_ERROR;
		break;
	}
	xfree(tmp);

	if (debug_flags & DEBUG_FLAG_DB_USAGE)
		DB_DEBUG(mysql_conn->conn, "query\n%s", query);
	result = mysql_db_query_ret(mysql_conn, query, 0);
	xfree(query);

	if (!result)
		return SLURM_ERROR;

	if (!(*usage_list))
		(*usage_list) = list_create(slurmdb_destroy_accounting_rec);

	assoc_mgr_lock(&locks);
	while ((row = mysql_fetch_row(result))) {
		slurmdb_tres_rec_t *tres_rec;
		slurmdb_accounting_rec_t *accounting_rec =
			xmalloc(sizeof(slurmdb_accounting_rec_t));

		accounting_rec->tres_rec.id = slurm_atoul(row[USAGE_TRES]);
		if ((tres_rec = list_find_first(
			     assoc_mgr_tres_list, slurmdb_find_tres_in_list,
			     &accounting_rec->tres_rec.id))) {
			accounting_rec->tres_rec.name =
				xstrdup(tres_rec->name);
			accounting_rec->tres_rec.type =
				xstrdup(tres_rec->type);
		}

		accounting_rec->id = slurm_atoul(row[USAGE_ID]);
		accounting_rec->period_start = slurm_atoul(row[USAGE_START]);
		accounting_rec->alloc_secs = slurm_atoull(row[USAGE_ALLOC]);

		list_append(*usage_list, accounting_rec);
	}
	assoc_mgr_unlock(&locks);

	mysql_free_result(result);

	return SLURM_SUCCESS;
}
Beispiel #15
0
extern void priority_p_job_end(struct job_record *job_ptr)
{
	uint64_t time_limit_secs = (uint64_t)job_ptr->time_limit * 60;
	slurmdb_assoc_rec_t *assoc_ptr;
	int i;
	uint64_t *unused_tres_run_secs;
	assoc_mgr_lock_t locks = { NO_LOCK, WRITE_LOCK, NO_LOCK,
				   WRITE_LOCK, NO_LOCK, NO_LOCK, NO_LOCK };

	/* No decaying in basic priority. Just remove the total secs. */
	unused_tres_run_secs = xmalloc(sizeof(uint64_t) * slurmctld_tres_cnt);
	for (i=0; i<slurmctld_tres_cnt; i++) {
		unused_tres_run_secs[i] =
			job_ptr->tres_alloc_cnt[i] * time_limit_secs;
	}

	assoc_mgr_lock(&locks);
	if (job_ptr->qos_ptr) {
		slurmdb_qos_rec_t *qos_ptr = job_ptr->qos_ptr;
		for (i=0; i<slurmctld_tres_cnt; i++) {
			if (unused_tres_run_secs[i] >
			    qos_ptr->usage->grp_used_tres_run_secs[i]) {
				qos_ptr->usage->grp_used_tres_run_secs[i] = 0;
				debug2("acct_policy_job_fini: "
				       "grp_used_tres_run_secs "
				       "underflow for qos %s tres %s",
				       qos_ptr->name,
				       assoc_mgr_tres_name_array[i]);
			} else
				qos_ptr->usage->grp_used_tres_run_secs[i] -=
					unused_tres_run_secs[i];
		}
	}
	assoc_ptr = job_ptr->assoc_ptr;
	while (assoc_ptr) {
		/* If the job finished early remove the extra time now. */
		for (i=0; i<slurmctld_tres_cnt; i++) {
			if (unused_tres_run_secs[i] >
			    assoc_ptr->usage->grp_used_tres_run_secs[i]) {
				assoc_ptr->usage->grp_used_tres_run_secs[i] = 0;
				debug2("acct_policy_job_fini: "
				       "grp_used_tres_run_secs "
				       "underflow for account %s tres %s",
				       assoc_ptr->acct,
				       assoc_mgr_tres_name_array[i]);

			} else {
				assoc_ptr->usage->grp_used_tres_run_secs[i] -=
					unused_tres_run_secs[i];
				debug4("acct_policy_job_fini: job %u. "
				       "Removed %"PRIu64" unused seconds "
				       "from acct %s tres %s "
				       "grp_used_tres_run_secs = %"PRIu64"",
				       job_ptr->job_id, unused_tres_run_secs[i],
				       assoc_ptr->acct,
				       assoc_mgr_tres_name_array[i],
				       assoc_ptr->usage->
				       grp_used_tres_run_secs[i]);
			}
		}
		/* now handle all the group limits of the parents */
		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
	}
	assoc_mgr_unlock(&locks);
	xfree(unused_tres_run_secs);

	return;
}
Beispiel #16
0
/*
 * Read and process the bluegene.conf configuration file so to interpret what
 * blocks are static/dynamic, torus/mesh, etc.
 */
extern int read_bg_conf(void)
{
	int i;
	bool tmp_bool = 0;
	int count = 0;
	s_p_hashtbl_t *tbl = NULL;
	char *tmp_char = NULL;
	select_ba_request_t **blockreq_array = NULL;
	image_t **image_array = NULL;
	image_t *image = NULL;
	static time_t last_config_update = (time_t) 0;
	struct stat config_stat;
	ListIterator itr = NULL;
	char* bg_conf_file = NULL;
	static int *dims = NULL;

	if (!dims)
		dims = select_g_ba_get_dims();

	if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
		info("Reading the bluegene.conf file");

	/* check if config file has changed */
	bg_conf_file = get_extra_conf_path("bluegene.conf");

	if (stat(bg_conf_file, &config_stat) < 0)
		fatal("can't stat bluegene.conf file %s: %m", bg_conf_file);
	if (last_config_update) {
		_reopen_bridge_log();
		if (last_config_update == config_stat.st_mtime) {
			if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
				info("%s unchanged", bg_conf_file);
		} else {
			info("Restart slurmctld for %s changes "
			     "to take effect",
			     bg_conf_file);
		}
		last_config_update = config_stat.st_mtime;
		xfree(bg_conf_file);
		return SLURM_SUCCESS;
	}
	last_config_update = config_stat.st_mtime;

	/* initialization */
	/* bg_conf defined in bg_node_alloc.h */
	if (!(tbl = config_make_tbl(bg_conf_file)))
		fatal("something wrong with opening/reading bluegene "
		      "conf file");
	xfree(bg_conf_file);

#ifdef HAVE_BGL
	if (s_p_get_array((void ***)&image_array,
			  &count, "AltBlrtsImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->blrts_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_blrtsimage, "BlrtsImage", tbl)) {
		if (!list_count(bg_conf->blrts_list))
			fatal("BlrtsImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->blrts_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_blrtsimage = xstrdup(image->name);
		info("Warning: using %s as the default BlrtsImage.  "
		     "If this isn't correct please set BlrtsImage",
		     bg_conf->default_blrtsimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default BlrtsImage %s",
			     bg_conf->default_blrtsimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_blrtsimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->blrts_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltLinuxImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->linux_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_linuximage, "LinuxImage", tbl)) {
		if (!list_count(bg_conf->linux_list))
			fatal("LinuxImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->linux_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_linuximage = xstrdup(image->name);
		info("Warning: using %s as the default LinuxImage.  "
		     "If this isn't correct please set LinuxImage",
		     bg_conf->default_linuximage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default LinuxImage %s",
			     bg_conf->default_linuximage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_linuximage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->linux_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltRamDiskImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->ramdisk_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_ramdiskimage,
			    "RamDiskImage", tbl)) {
		if (!list_count(bg_conf->ramdisk_list))
			fatal("RamDiskImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->ramdisk_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_ramdiskimage = xstrdup(image->name);
		info("Warning: using %s as the default RamDiskImage.  "
		     "If this isn't correct please set RamDiskImage",
		     bg_conf->default_ramdiskimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default RamDiskImage %s",
			     bg_conf->default_ramdiskimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_ramdiskimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->ramdisk_list, image);
	}
#elif defined HAVE_BGP

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltCnloadImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->linux_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_linuximage, "CnloadImage", tbl)) {
		if (!list_count(bg_conf->linux_list))
			fatal("CnloadImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->linux_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_linuximage = xstrdup(image->name);
		info("Warning: using %s as the default CnloadImage.  "
		     "If this isn't correct please set CnloadImage",
		     bg_conf->default_linuximage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default CnloadImage %s",
			     bg_conf->default_linuximage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_linuximage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->linux_list, image);
	}

	if (s_p_get_array((void ***)&image_array,
			  &count, "AltIoloadImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->ramdisk_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_ramdiskimage,
			    "IoloadImage", tbl)) {
		if (!list_count(bg_conf->ramdisk_list))
			fatal("IoloadImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->ramdisk_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_ramdiskimage = xstrdup(image->name);
		info("Warning: using %s as the default IoloadImage.  "
		     "If this isn't correct please set IoloadImage",
		     bg_conf->default_ramdiskimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default IoloadImage %s",
			     bg_conf->default_ramdiskimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_ramdiskimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->ramdisk_list, image);
	}

#endif
	if (s_p_get_array((void ***)&image_array,
			  &count, "AltMloaderImage", tbl)) {
		for (i = 0; i < count; i++) {
			list_append(bg_conf->mloader_list, image_array[i]);
			image_array[i] = NULL;
		}
	}
	if (!s_p_get_string(&bg_conf->default_mloaderimage,
			    "MloaderImage", tbl)) {
		if (!list_count(bg_conf->mloader_list))
			fatal("MloaderImage not configured "
			      "in bluegene.conf");
		itr = list_iterator_create(bg_conf->mloader_list);
		image = list_next(itr);
		image->def = true;
		list_iterator_destroy(itr);
		bg_conf->default_mloaderimage = xstrdup(image->name);
		info("Warning: using %s as the default MloaderImage.  "
		     "If this isn't correct please set MloaderImage",
		     bg_conf->default_mloaderimage);
	} else {
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("default MloaderImage %s",
			     bg_conf->default_mloaderimage);
		image = xmalloc(sizeof(image_t));
		image->name = xstrdup(bg_conf->default_mloaderimage);
		image->def = true;
		image->groups = NULL;
		/* we want it to be first */
		list_push(bg_conf->mloader_list, image);
	}

	if (!s_p_get_uint16(&bg_conf->mp_cnode_cnt, "MidplaneNodeCnt", tbl)) {
		if (!s_p_get_uint16(&bg_conf->mp_cnode_cnt,
				    "BasePartitionNodeCnt", tbl)) {
			error("MidplaneNodeCnt not configured in bluegene.conf "
			      "defaulting to 512 as MidplaneNodeCnt");
			bg_conf->mp_cnode_cnt = 512;
		}
	}

	if (bg_conf->mp_cnode_cnt <= 0)
		fatal("You should have more than 0 nodes "
		      "per midplane");
	bg_conf->actual_cnodes_per_mp = bg_conf->mp_cnode_cnt;
	bg_conf->quarter_cnode_cnt = bg_conf->mp_cnode_cnt/4;

	/* bg_conf->cpus_per_mp should had already been set from the
	 * node_init */
	if (bg_conf->cpus_per_mp < bg_conf->mp_cnode_cnt) {
		fatal("For some reason we have only %u cpus per mp, but "
		      "have %u cnodes per mp.  You need at least the same "
		      "number of cpus as you have cnodes per mp.  "
		      "Check the NodeName CPUs= "
		      "definition in the slurm.conf.",
		      bg_conf->cpus_per_mp, bg_conf->mp_cnode_cnt);
	}

	bg_conf->cpu_ratio = bg_conf->cpus_per_mp/bg_conf->mp_cnode_cnt;
	if (!bg_conf->cpu_ratio)
		fatal("We appear to have less than 1 cpu on a cnode.  "
		      "You specified %u for MidplaneNodeCnt "
		      "in the blugene.conf and %u cpus "
		      "for each node in the slurm.conf",
		      bg_conf->mp_cnode_cnt, bg_conf->cpus_per_mp);

	num_unused_cpus = 1;
	for (i = 0; i<SYSTEM_DIMENSIONS; i++)
		num_unused_cpus *= dims[i];
	num_unused_cpus *= bg_conf->cpus_per_mp;
	num_possible_unused_cpus = num_unused_cpus;

	if (!s_p_get_uint16(&bg_conf->nodecard_cnode_cnt,
			    "NodeBoardNodeCnt", tbl)) {
		if (!s_p_get_uint16(&bg_conf->nodecard_cnode_cnt,
				    "NodeCardNodeCnt", tbl)) {
			error("NodeCardNodeCnt not configured in bluegene.conf "
			      "defaulting to 32 as NodeCardNodeCnt");
			bg_conf->nodecard_cnode_cnt = 32;
		}
	}

	if (bg_conf->nodecard_cnode_cnt <= 0)
		fatal("You should have more than 0 nodes per nodecard");

	bg_conf->mp_nodecard_cnt =
		bg_conf->mp_cnode_cnt / bg_conf->nodecard_cnode_cnt;

	if (!s_p_get_uint16(&bg_conf->ionodes_per_mp, "IONodesPerMP", tbl))
		if (!s_p_get_uint16(&bg_conf->ionodes_per_mp, "Numpsets", tbl))
			fatal("Warning: IONodesPerMP not configured "
			      "in bluegene.conf");

	s_p_get_uint16(&bg_conf->max_block_err, "MaxBlockInError", tbl);

	tmp_bool = 0;
	s_p_get_boolean(&tmp_bool, "SubMidplaneSystem", tbl);
	bg_conf->sub_mp_sys = tmp_bool;

#ifdef HAVE_BGQ
	tmp_bool = 0;
	s_p_get_boolean(&tmp_bool, "AllowSubBlockAllocations", tbl);
	bg_conf->sub_blocks = tmp_bool;

	/* You can only have 16 ionodes per midplane */
	if (bg_conf->ionodes_per_mp > bg_conf->mp_nodecard_cnt)
		bg_conf->ionodes_per_mp = bg_conf->mp_nodecard_cnt;
#endif

	for (i=0; i<SYSTEM_DIMENSIONS; i++)
		bg_conf->default_conn_type[i] = (uint16_t)NO_VAL;
	s_p_get_string(&tmp_char, "DefaultConnType", tbl);
	if (tmp_char) {
		verify_conn_type(tmp_char, bg_conf->default_conn_type);
		if ((bg_conf->default_conn_type[0] != SELECT_MESH)
		    && (bg_conf->default_conn_type[0] != SELECT_TORUS))
			fatal("Can't have a DefaultConnType of %s "
			      "(only Mesh or Torus values are valid).",
			      tmp_char);
		xfree(tmp_char);
	} else
		bg_conf->default_conn_type[0] = SELECT_TORUS;

#ifndef HAVE_BG_L_P
	int first_conn_type = bg_conf->default_conn_type[0];
	for (i=1; i<SYSTEM_DIMENSIONS; i++) {
		if (bg_conf->default_conn_type[i] == (uint16_t)NO_VAL)
			bg_conf->default_conn_type[i] = first_conn_type;
		else if (bg_conf->default_conn_type[i] >= SELECT_SMALL)
			fatal("Can't have a DefaultConnType of %s "
			      "(only Mesh or Torus values are valid).",
			      tmp_char);
	}
#endif

	if (bg_conf->ionodes_per_mp) {
		bitstr_t *tmp_bitmap = NULL;
		int small_size = 1;

		/* THIS IS A HACK TO MAKE A 1 NODECARD SYSTEM WORK,
		 * Sometime on a Q system the nodecard isn't in the 0
		 * spot so only do this if you know it is in that
		 * spot.  Otherwise say the whole midplane is there
		 * and just make blocks over the whole thing.  They
		 * you can error out the blocks that aren't usable. */
		if (bg_conf->sub_mp_sys
		    && bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt) {
#ifdef HAVE_BGQ
			bg_conf->quarter_ionode_cnt = 1;
			bg_conf->nodecard_ionode_cnt = 1;
#else
			bg_conf->quarter_ionode_cnt = 2;
			bg_conf->nodecard_ionode_cnt = 2;
#endif
		} else {
			bg_conf->quarter_ionode_cnt = bg_conf->ionodes_per_mp/4;
			bg_conf->nodecard_ionode_cnt =
				bg_conf->quarter_ionode_cnt/4;
		}

		/* How many nodecards per ionode */
		bg_conf->nc_ratio =
			((double)bg_conf->mp_cnode_cnt
			 / (double)bg_conf->nodecard_cnode_cnt)
			/ (double)bg_conf->ionodes_per_mp;
		/* How many ionodes per nodecard */
		bg_conf->io_ratio =
			(double)bg_conf->ionodes_per_mp /
			((double)bg_conf->mp_cnode_cnt
			 / (double)bg_conf->nodecard_cnode_cnt);

		/* How many cnodes per ionode */
		bg_conf->ionode_cnode_cnt =
			bg_conf->nodecard_cnode_cnt * bg_conf->nc_ratio;

		//info("got %f %f", bg_conf->nc_ratio, bg_conf->io_ratio);
		/* figure out the smallest block we can have on the
		   system */
#ifdef HAVE_BGL
		if (bg_conf->io_ratio >= 1)
			bg_conf->smallest_block=32;
		else
			bg_conf->smallest_block=128;
#else

		if (bg_conf->io_ratio >= 2)
			bg_conf->smallest_block=16;
		else if (bg_conf->io_ratio == 1)
			bg_conf->smallest_block=32;
		else if (bg_conf->io_ratio == .5)
			bg_conf->smallest_block=64;
		else if (bg_conf->io_ratio == .25)
			bg_conf->smallest_block=128;
		else if (bg_conf->io_ratio == .125)
			bg_conf->smallest_block=256;
		else {
			error("unknown ioratio %f.  Can't figure out "
			      "smallest block size, setting it to midplane",
			      bg_conf->io_ratio);
			bg_conf->smallest_block = 512;
		}
#endif
		if (bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE)
			info("Smallest block possible on this system is %u",
			     bg_conf->smallest_block);
		/* below we are creating all the possible bitmaps for
		 * each size of small block
		 */
		if ((int)bg_conf->nodecard_ionode_cnt < 1) {
			bg_conf->nodecard_ionode_cnt = 0;
		} else {
			bg_lists->valid_small32 = list_create(_destroy_bitmap);
			/* This is suppose to be = and not ==, we only
			   want to decrement when small_size equals
			   something.
			*/
			if ((small_size = bg_conf->nodecard_ionode_cnt))
				small_size--;
			i = 0;
			while (i<bg_conf->ionodes_per_mp) {
				tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
				bit_nset(tmp_bitmap, i, i+small_size);
				i += small_size+1;
				list_append(bg_lists->valid_small32,
					    tmp_bitmap);
			}
		}
		/* If we only have 1 nodecard just jump to the end
		   since this will never need to happen below.
		   Pretty much a hack to avoid seg fault;). */
		if (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt)
			goto no_calc;

		bg_lists->valid_small128 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->quarter_ionode_cnt))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small128, tmp_bitmap);
		}

#ifndef HAVE_BGL
		bg_lists->valid_small64 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->nodecard_ionode_cnt * 2))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small64, tmp_bitmap);
		}

		bg_lists->valid_small256 = list_create(_destroy_bitmap);
		if ((small_size = bg_conf->quarter_ionode_cnt * 2))
			small_size--;
		i = 0;
		while (i<bg_conf->ionodes_per_mp) {
			tmp_bitmap = bit_alloc(bg_conf->ionodes_per_mp);
			bit_nset(tmp_bitmap, i, i+small_size);
			i += small_size+1;
			list_append(bg_lists->valid_small256, tmp_bitmap);
		}
#endif
	} else {
		fatal("your ionodes_per_mp is 0");
	}

no_calc:

	if (!s_p_get_uint16(&bg_conf->bridge_api_verb, "BridgeAPIVerbose", tbl))
		info("Warning: BridgeAPIVerbose not configured "
		     "in bluegene.conf");
	if (!s_p_get_string(&bg_conf->bridge_api_file,
			    "BridgeAPILogFile", tbl))
		info("BridgeAPILogFile not configured in bluegene.conf");
	else
		_reopen_bridge_log();

	if (s_p_get_string(&tmp_char, "DenyPassthrough", tbl)) {
		if (strstr(tmp_char, "A"))
			ba_deny_pass |= PASS_DENY_A;
		if (strstr(tmp_char, "X"))
			ba_deny_pass |= PASS_DENY_X;
		if (strstr(tmp_char, "Y"))
			ba_deny_pass |= PASS_DENY_Y;
		if (strstr(tmp_char, "Z"))
			ba_deny_pass |= PASS_DENY_Z;
		if (!xstrcasecmp(tmp_char, "ALL"))
			ba_deny_pass |= PASS_DENY_ALL;
		bg_conf->deny_pass = ba_deny_pass;
		xfree(tmp_char);
	}

	if (!s_p_get_string(&tmp_char, "LayoutMode", tbl)) {
		info("Warning: LayoutMode was not specified in bluegene.conf "
		     "defaulting to STATIC partitioning");
		bg_conf->layout_mode = LAYOUT_STATIC;
	} else {
		if (!xstrcasecmp(tmp_char,"STATIC"))
			bg_conf->layout_mode = LAYOUT_STATIC;
		else if (!xstrcasecmp(tmp_char,"OVERLAP"))
			bg_conf->layout_mode = LAYOUT_OVERLAP;
		else if (!xstrcasecmp(tmp_char,"DYNAMIC"))
			bg_conf->layout_mode = LAYOUT_DYNAMIC;
		else {
			fatal("I don't understand this LayoutMode = %s",
			      tmp_char);
		}
		xfree(tmp_char);
	}

	/* add blocks defined in file */
	if (bg_conf->layout_mode != LAYOUT_DYNAMIC) {
		if (!s_p_get_array((void ***)&blockreq_array,
				   &count, "MPs", tbl)) {
			if (!s_p_get_array((void ***)&blockreq_array,
					   &count, "BPs", tbl)) {
				info("WARNING: no blocks defined in "
				     "bluegene.conf, "
				     "only making full system block");
				/* create_full_system_block(NULL); */
				if (bg_conf->sub_mp_sys ||
				    (bg_conf->mp_cnode_cnt ==
				     bg_conf->nodecard_cnode_cnt))
					fatal("On a sub-midplane system you "
					      "need to define the blocks you "
					      "want on your system.");
			}
		}

		for (i = 0; i < count; i++) {
			add_bg_record(bg_lists->main, NULL,
				      blockreq_array[i], 0, 0);
		}
	} else if (bg_conf->sub_mp_sys ||
		   (bg_conf->mp_cnode_cnt == bg_conf->nodecard_cnode_cnt))
		/* we can't do dynamic here on a sub-midplane system */
		fatal("On a sub-midplane system we can only do OVERLAP or "
		      "STATIC LayoutMode.  Please update your bluegene.conf.");

#ifdef HAVE_BGQ
	if ((bg_recover != NOT_FROM_CONTROLLER) && assoc_mgr_qos_list
	    && s_p_get_string(&tmp_char, "RebootQOSList", tbl)) {
		bool valid;
		char *token, *last = NULL;
		slurmdb_qos_rec_t *qos = NULL;
		assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, NO_LOCK,
					   READ_LOCK, NO_LOCK,
					   NO_LOCK, NO_LOCK };

		/* Lock here to avoid g_qos_count changing under us */
		assoc_mgr_lock(&locks);
		bg_conf->reboot_qos_bitmap = bit_alloc(g_qos_count);
		itr = list_iterator_create(assoc_mgr_qos_list);

		token = strtok_r(tmp_char, ",", &last);
		while (token) {
			valid = false;
			while((qos = list_next(itr))) {
				if (!xstrcasecmp(token, qos->name)) {
					bit_set(bg_conf->reboot_qos_bitmap,
						qos->id);
					valid = true;
					break;
				}
			}
			if (!valid)
				error("Invalid RebootQOSList value: %s", token);
			list_iterator_reset(itr);
			token = strtok_r(NULL, ",", &last);
		}
		list_iterator_destroy(itr);
		xfree(tmp_char);
		assoc_mgr_unlock(&locks);
	}
#endif

	s_p_hashtbl_destroy(tbl);

	return SLURM_SUCCESS;
}
Beispiel #17
0
extern int slurm_jobcomp_log_record(struct job_record *job_ptr)
{
	int nwritten, B_SIZE = 1024;
	char usr_str[32], grp_str[32], start_str[32], end_str[32];
	char submit_str[32], *cluster = NULL, *qos, *state_string;
	time_t elapsed_time, submit_time, eligible_time;
	enum job_states job_state;
	uint32_t time_limit;
	uint16_t ntasks_per_node;
	int i;
	char *buffer, tmp_str[256], *script_str, *script;
	struct job_node *jnode;

	if (list_count(jobslist) > MAX_JOBS) {
		error("%s: Limit of %d enqueued jobs in memory waiting to be "
		      "indexed reached. Job %lu discarded", plugin_type,
		      MAX_JOBS, (unsigned long)job_ptr->job_id);
		return SLURM_ERROR;
	}

	_get_user_name(job_ptr->user_id, usr_str, sizeof(usr_str));
	_get_group_name(job_ptr->group_id, grp_str, sizeof(grp_str));

	if ((job_ptr->time_limit == NO_VAL) && job_ptr->part_ptr)
		time_limit = job_ptr->part_ptr->max_time;
	else
		time_limit = job_ptr->time_limit;

	if (job_ptr->job_state & JOB_RESIZING) {
		time_t now = time(NULL);
		state_string = job_state_string(job_ptr->job_state);
		if (job_ptr->resize_time) {
			_make_time_str(&job_ptr->resize_time, start_str,
				       sizeof(start_str));
		} else {
			_make_time_str(&job_ptr->start_time, start_str,
				       sizeof(start_str));
		}
		_make_time_str(&now, end_str, sizeof(end_str));
	} else {
		/* Job state will typically have JOB_COMPLETING or JOB_RESIZING
		 * flag set when called. We remove the flags to get the eventual
		 * completion state: JOB_FAILED, JOB_TIMEOUT, etc. */
		job_state = job_ptr->job_state & JOB_STATE_BASE;
		state_string = job_state_string(job_state);
		if (job_ptr->resize_time) {
			_make_time_str(&job_ptr->resize_time, start_str,
				       sizeof(start_str));
		} else if (job_ptr->start_time > job_ptr->end_time) {
			/* Job cancelled while pending and
			 * expected start time is in the future. */
			snprintf(start_str, sizeof(start_str), "Unknown");
		} else {
			_make_time_str(&job_ptr->start_time, start_str,
				       sizeof(start_str));
		}
		_make_time_str(&job_ptr->end_time, end_str, sizeof(end_str));
	}

	elapsed_time = job_ptr->end_time - job_ptr->start_time;

	buffer = xmalloc(B_SIZE);

	nwritten = snprintf(buffer, B_SIZE, JOBCOMP_DATA_FORMAT,
			    (unsigned long) job_ptr->job_id, usr_str,
			    (unsigned long) job_ptr->user_id, grp_str,
			    (unsigned long) job_ptr->group_id, start_str,
			    end_str, (long) elapsed_time,
			    job_ptr->partition, job_ptr->alloc_node,
			    job_ptr->nodes, (unsigned long) job_ptr->total_cpus,
			    (unsigned long) job_ptr->total_nodes,
			    (unsigned long) job_ptr->derived_ec,
			    (unsigned long) job_ptr->exit_code, state_string);

	if (nwritten >= B_SIZE) {
		B_SIZE += nwritten + 1;
		buffer = xrealloc(buffer, B_SIZE);

		nwritten = snprintf(buffer, B_SIZE, JOBCOMP_DATA_FORMAT,
				    (unsigned long) job_ptr->job_id, usr_str,
				    (unsigned long) job_ptr->user_id, grp_str,
				    (unsigned long) job_ptr->group_id,
				    start_str, end_str, (long) elapsed_time,
				    job_ptr->partition, job_ptr->alloc_node,
				    job_ptr->nodes,
				    (unsigned long) job_ptr->total_cpus,
				    (unsigned long) job_ptr->total_nodes,
				    (unsigned long) job_ptr->derived_ec,
				    (unsigned long) job_ptr->exit_code,
				    state_string);

		if (nwritten >= B_SIZE) {
			error("%s: Job completion data truncated and lost",
			      plugin_type);
			return SLURM_ERROR;
		}
	}

	snprintf(tmp_str, sizeof(tmp_str), ",\"cpu_hours\":%.6f",
		 ((float) elapsed_time * (float) job_ptr->total_cpus) /
		  (float) 3600);
	xstrcat(buffer, tmp_str);

	if (job_ptr->array_task_id != NO_VAL) {
		xstrfmtcat(buffer, ",\"array_job_id\":%lu",
			   (unsigned long) job_ptr->array_job_id);
		xstrfmtcat(buffer, ",\"array_task_id\":%lu",
			   (unsigned long) job_ptr->array_task_id);
	}

	if (job_ptr->details && (job_ptr->details->submit_time != NO_VAL)) {
		submit_time = job_ptr->details->submit_time;
		_make_time_str(&submit_time, submit_str, sizeof(submit_str));
		xstrfmtcat(buffer, ",\"@submit\":\"%s\"", submit_str);
	}

	if (job_ptr->details && (job_ptr->details->begin_time != NO_VAL)) {
		eligible_time = job_ptr->start_time -
				job_ptr->details->begin_time;
		xstrfmtcat(buffer, ",\"eligible_time\":%lu", eligible_time);
	}

	if (job_ptr->details
	    && (job_ptr->details->work_dir && job_ptr->details->work_dir[0])) {
		xstrfmtcat(buffer, ",\"work_dir\":\"%s\"",
			   job_ptr->details->work_dir);
	}

	if (job_ptr->details
	    && (job_ptr->details->std_err && job_ptr->details->std_err[0])) {
		xstrfmtcat(buffer, ",\"std_err\":\"%s\"",
			   job_ptr->details->std_err);
	}

	if (job_ptr->details
	    && (job_ptr->details->std_in && job_ptr->details->std_in[0])) {
		xstrfmtcat(buffer, ",\"std_in\":\"%s\"",
			   job_ptr->details->std_in);
	}

	if (job_ptr->details
	    && (job_ptr->details->std_out && job_ptr->details->std_out[0])) {
		xstrfmtcat(buffer, ",\"std_out\":\"%s\"",
			   job_ptr->details->std_out);
	}

	if (job_ptr->assoc_ptr != NULL) {
		cluster = ((slurmdb_assoc_rec_t *) job_ptr->assoc_ptr)->cluster;
		xstrfmtcat(buffer, ",\"cluster\":\"%s\"", cluster);
	}

	if (job_ptr->qos_ptr != NULL) {
		slurmdb_qos_rec_t *assoc =
			(slurmdb_qos_rec_t *) job_ptr->qos_ptr;
		qos = assoc->name;
		xstrfmtcat(buffer, ",\"qos\":\"%s\"", qos);
	}

	if (job_ptr->details && (job_ptr->details->num_tasks != NO_VAL)) {
		xstrfmtcat(buffer, ",\"ntasks\":%hu",
			   job_ptr->details->num_tasks);
	}

	if (job_ptr->details && (job_ptr->details->ntasks_per_node != NO_VAL)) {
		ntasks_per_node = job_ptr->details->ntasks_per_node;
		xstrfmtcat(buffer, ",\"ntasks_per_node\":%hu", ntasks_per_node);
	}

	if (job_ptr->details && (job_ptr->details->cpus_per_task != NO_VAL)) {
		xstrfmtcat(buffer, ",\"cpus_per_task\":%hu",
			   job_ptr->details->cpus_per_task);
	}

	if (job_ptr->details
	    && (job_ptr->details->orig_dependency
		&& job_ptr->details->orig_dependency[0])) {
		xstrfmtcat(buffer, ",\"orig_dependency\":\"%s\"",
			   job_ptr->details->orig_dependency);
	}

	if (job_ptr->details
	    && (job_ptr->details->exc_nodes
		&& job_ptr->details->exc_nodes[0])) {
		xstrfmtcat(buffer, ",\"excluded_nodes\":\"%s\"",
			   job_ptr->details->exc_nodes);
	}

	if (time_limit != INFINITE) {
		xstrfmtcat(buffer, ",\"time_limit\":%lu",
			   (unsigned long) time_limit * 60);
	}

	if (job_ptr->resv_name && job_ptr->resv_name[0]) {
		xstrfmtcat(buffer, ",\"reservation_name\":\"%s\"",
			   job_ptr->resv_name);
	}

	if (job_ptr->gres_req && job_ptr->gres_req[0]) {
		xstrfmtcat(buffer, ",\"gres_req\":\"%s\"", job_ptr->gres_req);
	}

	if (job_ptr->gres_alloc && job_ptr->gres_alloc[0]) {
		xstrfmtcat(buffer, ",\"gres_alloc\":\"%s\"",
			   job_ptr->gres_alloc);
	}

	if (job_ptr->account && job_ptr->account[0]) {
		xstrfmtcat(buffer, ",\"account\":\"%s\"", job_ptr->account);
	}

	script = get_job_script(job_ptr);
	if (script && script[0]) {
		script_str = _json_escape(script);
		xstrfmtcat(buffer, ",\"script\":\"%s\"", script_str);
		xfree(script_str);
	}
	xfree(script);

	if (job_ptr->assoc_ptr) {
		assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK,
					   NO_LOCK, NO_LOCK, NO_LOCK };
		slurmdb_assoc_rec_t *assoc_ptr = job_ptr->assoc_ptr;
		char *parent_accounts = NULL;
		char **acc_aux = NULL;
		int nparents = 0;

		assoc_mgr_lock(&locks);

		/* Start at the first parent and go up.  When studying
		 * this code it was slightly faster to do 2 loops on
		 * the association linked list and only 1 xmalloc but
		 * we opted for cleaner looking code and going with a
		 * realloc. */
		while (assoc_ptr) {
			if (assoc_ptr->acct) {
				acc_aux = xrealloc(acc_aux,
						   sizeof(char *) *
						   (nparents + 1));
				acc_aux[nparents++] = assoc_ptr->acct;
			}
			assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		}

		for (i = nparents - 1; i >= 0; i--)
			xstrfmtcat(parent_accounts, "/%s", acc_aux[i]);
		xfree(acc_aux);

		xstrfmtcat(buffer, ",\"parent_accounts\":\"%s\"",
			   parent_accounts);

		xfree(parent_accounts);

		assoc_mgr_unlock(&locks);
	}

	xstrcat(buffer, "}");
	jnode = xmalloc(sizeof(struct job_node));
	jnode->serialized_job = xstrdup(buffer);
	list_enqueue(jobslist, jnode);

	return SLURM_SUCCESS;
}
Beispiel #18
0
/*
  Remove previously used time from qos and assocs
  grp_used_cpu_run_secs.

  When restarting slurmctld acct_policy_job_begin() is called for all
  running jobs. There every jobs total requested cputime (total_cpus *
  time_limit) is added to grp_used_cpu_run_secs of assocs and qos.

  This function will subtract all cputime that was used until the
  decay thread last ran. This kludge is necessary as the decay thread
  last_ran variable can't be accessed from acct_policy_job_begin().
*/
void _init_grp_used_cpu_run_secs(time_t last_ran)
{
	struct job_record *job_ptr = NULL;
	ListIterator itr;
	assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK,
				   WRITE_LOCK, NO_LOCK, NO_LOCK };
	slurmctld_lock_t job_read_lock =
		{ NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK };
	uint64_t delta;
	slurmdb_qos_rec_t *qos;
	slurmdb_association_rec_t *assoc;

	if(priority_debug)
		info("Initializing grp_used_cpu_run_secs");

	if (!(job_list && list_count(job_list)))
		return;

	lock_slurmctld(job_read_lock);
	itr = list_iterator_create(job_list);
	if (itr == NULL)
		fatal("list_iterator_create: malloc failure");

	assoc_mgr_lock(&locks);
	while ((job_ptr = list_next(itr))) {
		if (priority_debug)
			debug2("job: %u",job_ptr->job_id);
		qos = NULL;
		assoc = NULL;
		delta = 0;

		if (!IS_JOB_RUNNING(job_ptr))
			continue;

		if (job_ptr->start_time > last_ran)
			continue;

		delta = job_ptr->total_cpus * (last_ran - job_ptr->start_time);

		qos = (slurmdb_qos_rec_t *) job_ptr->qos_ptr;
		assoc = (slurmdb_association_rec_t *) job_ptr->assoc_ptr;

		if(qos) {
			if (priority_debug)
				info("Subtracting %"PRIu64" from qos "
				     "%u grp_used_cpu_run_secs "
				     "%"PRIu64" = %"PRIu64"",
				     delta,
				     qos->id,
				     qos->usage->grp_used_cpu_run_secs,
				     qos->usage->grp_used_cpu_run_secs -
				     delta);
			qos->usage->grp_used_cpu_run_secs -= delta;
		}
		while (assoc) {
			if (priority_debug)
				info("Subtracting %"PRIu64" from assoc %u "
				     "grp_used_cpu_run_secs "
				     "%"PRIu64" = %"PRIu64"",
				     delta,
				     assoc->id,
				     assoc->usage->grp_used_cpu_run_secs,
				     assoc->usage->grp_used_cpu_run_secs -
				     delta);
			assoc->usage->grp_used_cpu_run_secs -= delta;
			assoc = assoc->usage->parent_assoc_ptr;
		}
	}
	assoc_mgr_unlock(&locks);
	list_iterator_destroy(itr);
	unlock_slurmctld(job_read_lock);
}
Beispiel #19
0
static void _adjust_limit_usage(int type, struct job_record *job_ptr)
{
	slurmdb_association_rec_t *assoc_ptr = NULL;
	assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK,
				   WRITE_LOCK, NO_LOCK, NO_LOCK };

	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS)
	    || !_valid_job_assoc(job_ptr))
		return;

	assoc_mgr_lock(&locks);
	if (job_ptr->qos_ptr && (accounting_enforce & ACCOUNTING_ENFORCE_QOS)) {
		ListIterator itr = NULL;
		slurmdb_qos_rec_t *qos_ptr = NULL;
		slurmdb_used_limits_t *used_limits = NULL;

		qos_ptr = (slurmdb_qos_rec_t *)job_ptr->qos_ptr;
		if(!qos_ptr->usage->user_limit_list)
			qos_ptr->usage->user_limit_list =
				list_create(slurmdb_destroy_used_limits);
		itr = list_iterator_create(qos_ptr->usage->user_limit_list);
		while((used_limits = list_next(itr))) {
			if(used_limits->uid == job_ptr->user_id)
				break;
		}
		list_iterator_destroy(itr);
		if(!used_limits) {
			used_limits = xmalloc(sizeof(slurmdb_used_limits_t));
			used_limits->uid = job_ptr->user_id;
			list_append(qos_ptr->usage->user_limit_list,
				    used_limits);
		}
		switch(type) {
		case ACCT_POLICY_ADD_SUBMIT:
			qos_ptr->usage->grp_used_submit_jobs++;
			used_limits->submit_jobs++;
			break;
		case ACCT_POLICY_REM_SUBMIT:
			if(qos_ptr->usage->grp_used_submit_jobs)
				qos_ptr->usage->grp_used_submit_jobs--;
			else
				debug2("acct_policy_remove_job_submit: "
				       "grp_submit_jobs underflow for qos %s",
				       qos_ptr->name);

			if(used_limits->submit_jobs)
				used_limits->submit_jobs--;
			else
				debug2("acct_policy_remove_job_submit: "
				       "used_submit_jobs underflow for "
				       "qos %s user %d",
				       qos_ptr->name, used_limits->uid);
			break;
		case ACCT_POLICY_JOB_BEGIN:
			qos_ptr->usage->grp_used_jobs++;
			qos_ptr->usage->grp_used_cpus += job_ptr->total_cpus;
			qos_ptr->usage->grp_used_nodes += job_ptr->node_cnt;
			used_limits->jobs++;
			break;
		case ACCT_POLICY_JOB_FINI:
			if(qos_ptr->usage->grp_used_jobs)
				qos_ptr->usage->grp_used_jobs--;
			else
				debug2("acct_policy_job_fini: used_jobs "
				       "underflow for qos %s", qos_ptr->name);

			qos_ptr->usage->grp_used_cpus -= job_ptr->total_cpus;
			if((int32_t)qos_ptr->usage->grp_used_cpus < 0) {
				qos_ptr->usage->grp_used_cpus = 0;
				debug2("acct_policy_job_fini: grp_used_cpus "
				       "underflow for qos %s", qos_ptr->name);
			}

			qos_ptr->usage->grp_used_nodes -= job_ptr->node_cnt;
			if((int32_t)qos_ptr->usage->grp_used_nodes < 0) {
				qos_ptr->usage->grp_used_nodes = 0;
				debug2("acct_policy_job_fini: grp_used_nodes "
				       "underflow for qos %s", qos_ptr->name);
			}

			if(used_limits->jobs)
				used_limits->jobs--;
			else
				debug2("acct_policy_job_fini: used_jobs "
				       "underflow for qos %s user %d",
				       qos_ptr->name, used_limits->uid);
			break;
		default:
			error("acct_policy: qos unknown type %d", type);
			break;
		}
	}

	assoc_ptr = (slurmdb_association_rec_t *)job_ptr->assoc_ptr;
	while(assoc_ptr) {
		switch(type) {
		case ACCT_POLICY_ADD_SUBMIT:
			assoc_ptr->usage->used_submit_jobs++;
			break;
		case ACCT_POLICY_REM_SUBMIT:
			if (assoc_ptr->usage->used_submit_jobs)
				assoc_ptr->usage->used_submit_jobs--;
			else
				debug2("acct_policy_remove_job_submit: "
				       "used_submit_jobs underflow for "
				       "account %s",
				       assoc_ptr->acct);
			break;
		case ACCT_POLICY_JOB_BEGIN:
			assoc_ptr->usage->used_jobs++;
			assoc_ptr->usage->grp_used_cpus += job_ptr->total_cpus;
			assoc_ptr->usage->grp_used_nodes += job_ptr->node_cnt;
			break;
		case ACCT_POLICY_JOB_FINI:
			if (assoc_ptr->usage->used_jobs)
				assoc_ptr->usage->used_jobs--;
			else
				debug2("acct_policy_job_fini: used_jobs "
				       "underflow for account %s",
				       assoc_ptr->acct);

			assoc_ptr->usage->grp_used_cpus -= job_ptr->total_cpus;
			if ((int32_t)assoc_ptr->usage->grp_used_cpus < 0) {
				assoc_ptr->usage->grp_used_cpus = 0;
				debug2("acct_policy_job_fini: grp_used_cpus "
				       "underflow for account %s",
				       assoc_ptr->acct);
			}

			assoc_ptr->usage->grp_used_nodes -= job_ptr->node_cnt;
			if ((int32_t)assoc_ptr->usage->grp_used_nodes < 0) {
				assoc_ptr->usage->grp_used_nodes = 0;
				debug2("acct_policy_job_fini: grp_used_nodes "
				       "underflow for account %s",
				       assoc_ptr->acct);
			}
			break;
		default:
			error("acct_policy: association unknown type %d", type);
			break;
		}
		/* now handle all the group limits of the parents */
		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
	}
	assoc_mgr_unlock(&locks);
}
Beispiel #20
0
/* If the job is running then apply decay to the job.
 *
 * Return 0 if we don't need to process the job any further, 1 if
 * futher processing is needed.
 */
static int _apply_new_usage(struct job_record *job_ptr, double decay_factor,
			    time_t start_period, time_t end_period)
{
	slurmdb_qos_rec_t *qos;
	slurmdb_association_rec_t *assoc;
	int run_delta = 0;
	double run_decay = 0.0, real_decay = 0.0;
	uint64_t cpu_run_delta = 0;
	uint64_t job_time_limit_ends = 0;
	assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK,
				   WRITE_LOCK, NO_LOCK, NO_LOCK };
	assoc_mgr_lock_t qos_read_lock = { NO_LOCK, NO_LOCK,
					   READ_LOCK, NO_LOCK, NO_LOCK };

	/* If usage_factor is 0 just skip this
	   since we don't add the usage.
	*/
	assoc_mgr_lock(&qos_read_lock);
	qos = (slurmdb_qos_rec_t *)job_ptr->qos_ptr;
	if (qos && !qos->usage_factor) {
		assoc_mgr_unlock(&qos_read_lock);
		return 0;
	}
	assoc_mgr_unlock(&qos_read_lock);

	if (job_ptr->start_time > start_period)
		start_period = job_ptr->start_time;

	if (job_ptr->end_time
	    && (end_period > job_ptr->end_time))
		end_period = job_ptr->end_time;

	run_delta = (int) (end_period - start_period);

	/* job already has been accounted for
	   go to next */
	if (run_delta < 1)
		return 0;

	/* cpu_run_delta will is used to
	   decrease qos and assocs
	   grp_used_cpu_run_secs values. When
	   a job is started only seconds until
	   start_time+time_limit is added, so
	   for jobs running over their
	   timelimit we should only subtract
	   the used time until the time
	   limit. */
	job_time_limit_ends =
		(uint64_t)job_ptr->start_time +
		(uint64_t)job_ptr->time_limit * 60;

	if ((uint64_t)start_period  >= job_time_limit_ends)
		cpu_run_delta = 0;
	else if (end_period > job_time_limit_ends)
		cpu_run_delta = job_ptr->total_cpus *
			(job_time_limit_ends - (uint64_t)start_period);
	else
		cpu_run_delta = job_ptr->total_cpus * run_delta;

	if (priority_debug)
		info("job %u ran for %d seconds on %u cpus",
		     job_ptr->job_id, run_delta, job_ptr->total_cpus);

	/* get the time in decayed fashion */
	run_decay = run_delta * pow(decay_factor, (double)run_delta);

	real_decay = run_decay * (double)job_ptr->total_cpus;

	assoc_mgr_lock(&locks);
	/* Just to make sure we don't make a
	   window where the qos_ptr could of
	   changed make sure we get it again
	   here.
	*/
	qos = (slurmdb_qos_rec_t *)job_ptr->qos_ptr;
	assoc = (slurmdb_association_rec_t *)job_ptr->assoc_ptr;

	/* now apply the usage factor for this qos */
	if (qos) {
		if (qos->usage_factor >= 0) {
			real_decay *= qos->usage_factor;
			run_decay *= qos->usage_factor;
		}
		qos->usage->grp_used_wall += run_decay;
		qos->usage->usage_raw += (long double)real_decay;
		if (qos->usage->grp_used_cpu_run_secs >= cpu_run_delta) {
			if (priority_debug)
				info("grp_used_cpu_run_secs is %"PRIu64", "
				     "will subtract %"PRIu64"",
				     qos->usage->grp_used_cpu_run_secs,
				     cpu_run_delta);
			qos->usage->grp_used_cpu_run_secs -= cpu_run_delta;
		} else {
			if (priority_debug)
				info("jobid %u, qos %s: setting "
				     "grp_used_cpu_run_secs "
				     "to 0 because %"PRIu64" < %"PRIu64"",
				     job_ptr->job_id, qos->name,
				     qos->usage->grp_used_cpu_run_secs,
				     cpu_run_delta);
			qos->usage->grp_used_cpu_run_secs = 0;
		}
	}

	/* We want to do this all the way up
	   to and including root.  This way we
	   can keep track of how much usage
	   has occured on the entire system
	   and use that to normalize against.
	*/
	while (assoc) {
		if (assoc->usage->grp_used_cpu_run_secs >= cpu_run_delta) {
			if(priority_debug)
				info("grp_used_cpu_run_secs is %"PRIu64", "
				     "will subtract %"PRIu64"",
				     assoc->usage->grp_used_cpu_run_secs,
				     cpu_run_delta);
			assoc->usage->grp_used_cpu_run_secs -= cpu_run_delta;
		} else {
			if (priority_debug)
				info("jobid %u, assoc %u: setting "
				     "grp_used_cpu_run_secs "
				     "to 0 because %"PRIu64" < %"PRIu64"",
				     job_ptr->job_id, assoc->id,
				     assoc->usage->grp_used_cpu_run_secs,
				     cpu_run_delta);
			assoc->usage->grp_used_cpu_run_secs = 0;
		}

		assoc->usage->grp_used_wall += run_decay;
		assoc->usage->usage_raw += (long double)real_decay;
		if (priority_debug)
			info("adding %f new usage to assoc %u (user='******' "
			     "acct='%s') raw usage is now %Lf.  Group wall "
			     "added %f making it %f. GrpCPURunMins is "
			     "%"PRIu64"",
			     real_decay, assoc->id,
			     assoc->user, assoc->acct,
			     assoc->usage->usage_raw,
			     run_decay,
			     assoc->usage->grp_used_wall,
			     assoc->usage->grp_used_cpu_run_secs/60);
		assoc = assoc->usage->parent_assoc_ptr;
	}
	assoc_mgr_unlock(&locks);
	return 1;
}
Beispiel #21
0
extern bool acct_policy_validate(job_desc_msg_t *job_desc,
				 struct part_record *part_ptr,
				 slurmdb_association_rec_t *assoc_in,
				 slurmdb_qos_rec_t *qos_ptr,
				 uint16_t *limit_set_max_cpus,
				 uint16_t *limit_set_max_nodes,
				 uint16_t *limit_set_time, bool update_call)
{
	uint32_t time_limit;
	slurmdb_association_rec_t *assoc_ptr = assoc_in;
	int parent = 0;
	char *user_name = NULL;
	bool rc = true;
	assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK,
				   READ_LOCK, NO_LOCK, NO_LOCK };

	xassert(limit_set_max_cpus);
	xassert(limit_set_max_nodes);
	xassert(limit_set_time);

	if (!assoc_ptr) {
		error("_validate_acct_policy: no assoc_ptr given for job.");
		return false;
	}

	user_name = assoc_ptr->user;

	assoc_mgr_lock(&locks);
	if (qos_ptr) {
		/* for validation we don't need to look at
		 * qos_ptr->grp_cpu_mins.
		 */
		if (((*limit_set_max_cpus) == ADMIN_SET_LIMIT)
		    || (qos_ptr->grp_cpus == INFINITE)
		    || (update_call && (job_desc->max_cpus == NO_VAL))) {
			/* no need to check/set */
		} else if ((job_desc->min_cpus != NO_VAL)
			   && (job_desc->min_cpus > qos_ptr->grp_cpus)) {
			info("job submit for user %s(%u): "
			     "min cpu request %u exceeds "
			     "group max cpu limit %u for qos '%s'",
			     user_name,
			     job_desc->user_id,
			     job_desc->min_cpus,
			     qos_ptr->grp_cpus,
			     qos_ptr->name);
			rc = false;
			goto end_it;
		} else if ((job_desc->max_cpus == NO_VAL)
			   || ((*limit_set_max_cpus)
			       && (job_desc->max_cpus > qos_ptr->grp_cpus))) {
			job_desc->max_cpus = qos_ptr->grp_cpus;
			(*limit_set_max_cpus) = 1;
		} else if (job_desc->max_cpus > qos_ptr->grp_cpus) {
			info("job submit for user %s(%u): "
			     "max cpu changed %u -> %u because "
			     "of qos limit",
			     user_name,
			     job_desc->user_id,
			     job_desc->max_cpus,
			     qos_ptr->grp_cpus);
			if (job_desc->max_cpus == NO_VAL)
				(*limit_set_max_cpus) = 1;
			job_desc->max_cpus = qos_ptr->grp_cpus;
		}

		/* for validation we don't need to look at
		 * qos_ptr->grp_jobs.
		 */

		if (((*limit_set_max_nodes) == ADMIN_SET_LIMIT)
		    || (qos_ptr->grp_nodes == INFINITE)
		    || (update_call && (job_desc->max_nodes == NO_VAL))) {
			/* no need to check/set */
		} else if ((job_desc->min_nodes != NO_VAL)
			   && (job_desc->min_nodes > qos_ptr->grp_nodes)) {
			info("job submit for user %s(%u): "
			     "min node request %u exceeds "
			     "group max node limit %u for qos '%s'",
			     user_name,
			     job_desc->user_id,
			     job_desc->min_nodes,
			     qos_ptr->grp_nodes,
			     qos_ptr->name);
			rc = false;
			goto end_it;
		} else if ((job_desc->max_nodes == 0)
			   || ((*limit_set_max_nodes)
			       && (job_desc->max_nodes > qos_ptr->grp_nodes))) {
			job_desc->max_nodes = qos_ptr->grp_nodes;
			(*limit_set_max_nodes) = 1;
		} else if (job_desc->max_nodes > qos_ptr->grp_nodes) {
			info("job submit for user %s(%u): "
			     "max node changed %u -> %u because "
			     "of qos limit",
			     user_name,
			     job_desc->user_id,
			     job_desc->max_nodes,
			     qos_ptr->grp_nodes);
			if (job_desc->max_nodes == NO_VAL)
				(*limit_set_max_nodes) = 1;
			job_desc->max_nodes = qos_ptr->grp_nodes;
		}

		if ((qos_ptr->grp_submit_jobs != INFINITE) &&
		    (qos_ptr->usage->grp_used_submit_jobs
		     >= qos_ptr->grp_submit_jobs)) {
			info("job submit for user %s(%u): "
			     "group max submit job limit exceeded %u "
			     "for qos '%s'",
			     user_name,
			     job_desc->user_id,
			     qos_ptr->grp_submit_jobs,
			     qos_ptr->name);
			rc = false;
			goto end_it;
		}


		/* for validation we don't need to look at
		 * qos_ptr->grp_wall. It is checked while the job is running.
		 */


		/* for validation we don't need to look at
		 * qos_ptr->max_cpu_mins_pj. It is checked while the
		 * job is running.
		 */

		if (((*limit_set_max_cpus) == ADMIN_SET_LIMIT)
		    || (qos_ptr->max_cpus_pj == INFINITE)
		    || (update_call && (job_desc->max_cpus == NO_VAL))) {
			/* no need to check/set */
		} else if ((job_desc->min_cpus != NO_VAL)
			   && (job_desc->min_cpus > qos_ptr->max_cpus_pj)) {
			info("job submit for user %s(%u): "
			     "min cpu limit %u exceeds "
			     "qos max %u",
			     user_name,
			     job_desc->user_id,
			     job_desc->min_cpus,
			     qos_ptr->max_cpus_pj);
			rc = false;
			goto end_it;
		} else if ((job_desc->max_cpus == NO_VAL)
			   || ((*limit_set_max_cpus)
			       && (job_desc->max_cpus
				   > qos_ptr->max_cpus_pj))) {
			job_desc->max_cpus = qos_ptr->max_cpus_pj;
			(*limit_set_max_cpus) = 1;
		} else if (job_desc->max_cpus > qos_ptr->max_cpus_pj) {
			info("job submit for user %s(%u): "
			     "max cpu changed %u -> %u because "
			     "of qos limit",
			     user_name,
			     job_desc->user_id,
			     job_desc->max_cpus,
			     qos_ptr->max_cpus_pj);
			if (job_desc->max_cpus == NO_VAL)
				(*limit_set_max_cpus) = 1;
			job_desc->max_cpus = qos_ptr->max_cpus_pj;
		}

		/* for validation we don't need to look at
		 * qos_ptr->max_jobs.
		 */

		if (((*limit_set_max_nodes) == ADMIN_SET_LIMIT)
		    || (qos_ptr->max_nodes_pj == INFINITE)
		    || (update_call && (job_desc->max_nodes == NO_VAL))) {
			/* no need to check/set */
		} else if ((job_desc->min_nodes != NO_VAL)
			   && (job_desc->min_nodes > qos_ptr->max_nodes_pj)) {
			info("job submit for user %s(%u): "
			     "min node limit %u exceeds "
			     "qos max %u",
			     user_name,
			     job_desc->user_id,
			     job_desc->min_nodes,
			     qos_ptr->max_nodes_pj);
			rc = false;
			goto end_it;
		} else if ((job_desc->max_nodes == 0)
			   || ((*limit_set_max_nodes)
			       && (job_desc->max_nodes
				   > qos_ptr->max_nodes_pj))) {
			job_desc->max_nodes = qos_ptr->max_nodes_pj;
			(*limit_set_max_nodes) = 1;
		} else if (job_desc->max_nodes > qos_ptr->max_nodes_pj) {
			info("job submit for user %s(%u): "
			     "max node changed %u -> %u because "
			     "of qos limit",
			     user_name,
			     job_desc->user_id,
			     job_desc->max_nodes,
			     qos_ptr->max_nodes_pj);
			if (job_desc->max_nodes == NO_VAL)
				(*limit_set_max_nodes) = 1;
			job_desc->max_nodes = qos_ptr->max_nodes_pj;
		}

		if (qos_ptr->max_submit_jobs_pu != INFINITE) {
			slurmdb_used_limits_t *used_limits = NULL;
			if (qos_ptr->usage->user_limit_list) {
				ListIterator itr = list_iterator_create(
					qos_ptr->usage->user_limit_list);
				while((used_limits = list_next(itr))) {
					if (used_limits->uid
					    == job_desc->user_id)
						break;
				}
				list_iterator_destroy(itr);
			}
			if (used_limits && (used_limits->submit_jobs
					    >= qos_ptr->max_submit_jobs_pu)) {
				info("job submit for user %s(%u): "
				     "qos max submit job limit exceeded %u",
				     user_name,
				     job_desc->user_id,
				     qos_ptr->max_submit_jobs_pu);
				rc = false;
				goto end_it;
			}
		}

		if (((*limit_set_time) == ADMIN_SET_LIMIT)
		    || (qos_ptr->max_wall_pj == INFINITE)
		    || (update_call && (job_desc->time_limit == NO_VAL))) {
			/* no need to check/set */
		} else {
			time_limit = qos_ptr->max_wall_pj;
			if (job_desc->time_limit == NO_VAL) {
				if (part_ptr->max_time == INFINITE)
					job_desc->time_limit = time_limit;
				else
					job_desc->time_limit =
						MIN(time_limit,
						    part_ptr->max_time);
				(*limit_set_time) = 1;
			} else if ((*limit_set_time) &&
				   job_desc->time_limit > time_limit) {
				job_desc->time_limit = time_limit;
			} else if (job_desc->time_limit > time_limit) {
				info("job submit for user %s(%u): "
				     "time limit %u exceeds qos max %u",
				     user_name,
				     job_desc->user_id,
				     job_desc->time_limit, time_limit);
				rc = false;
				goto end_it;
			}
		}

	}

	while(assoc_ptr) {
		/* for validation we don't need to look at
		 * assoc_ptr->grp_cpu_mins.
		 */

		if (((*limit_set_max_cpus) == ADMIN_SET_LIMIT)
		    || (qos_ptr && (qos_ptr->grp_cpus != INFINITE))
		    || (assoc_ptr->grp_cpus == INFINITE)
		    || (update_call && (job_desc->max_cpus == NO_VAL))) {
			/* no need to check/set */
		} else if ((job_desc->min_cpus != NO_VAL)
			   && (job_desc->min_cpus > assoc_ptr->grp_cpus)) {
			info("job submit for user %s(%u): "
			     "min cpu request %u exceeds "
			     "group max cpu limit %u for account %s",
			     user_name,
			     job_desc->user_id,
			     job_desc->min_cpus,
			     assoc_ptr->grp_cpus,
			     assoc_ptr->acct);
			rc = false;
			break;
		} else if ((job_desc->max_cpus == NO_VAL)
			   || ((*limit_set_max_cpus)
			       && (job_desc->max_cpus > assoc_ptr->grp_cpus))) {
			job_desc->max_cpus = assoc_ptr->grp_cpus;
			(*limit_set_max_cpus) = 1;
		} else if (job_desc->max_cpus > assoc_ptr->grp_cpus) {
			info("job submit for user %s(%u): "
			     "max cpu changed %u -> %u because "
			     "of account limit",
			     user_name,
			     job_desc->user_id,
			     job_desc->max_cpus,
			     assoc_ptr->grp_cpus);
			if (job_desc->max_cpus == NO_VAL)
				(*limit_set_max_cpus) = 1;
			job_desc->max_cpus = assoc_ptr->grp_cpus;
		}

		/* for validation we don't need to look at
		 * assoc_ptr->grp_jobs.
		 */

		if (((*limit_set_max_nodes) == ADMIN_SET_LIMIT)
		    || (qos_ptr && (qos_ptr->grp_nodes != INFINITE))
		    || (assoc_ptr->grp_nodes == INFINITE)
		    || (update_call && (job_desc->max_nodes == NO_VAL))) {
			/* no need to check/set */
		} else if ((job_desc->min_nodes != NO_VAL)
			   && (job_desc->min_nodes > assoc_ptr->grp_nodes)) {
			info("job submit for user %s(%u): "
			     "min node request %u exceeds "
			     "group max node limit %u for account %s",
			     user_name,
			     job_desc->user_id,
			     job_desc->min_nodes,
			     assoc_ptr->grp_nodes,
			     assoc_ptr->acct);
			rc = false;
			break;
		} else if ((job_desc->max_nodes == 0)
			   || ((*limit_set_max_nodes)
			       && (job_desc->max_nodes
				   > assoc_ptr->grp_nodes))) {
			job_desc->max_nodes = assoc_ptr->grp_nodes;
			(*limit_set_max_nodes) = 1;
		} else if (job_desc->max_nodes > assoc_ptr->grp_nodes) {
			info("job submit for user %s(%u): "
			     "max node changed %u -> %u because "
			     "of account limit",
			     user_name,
			     job_desc->user_id,
			     job_desc->max_nodes,
			     assoc_ptr->grp_nodes);
			if (job_desc->max_nodes == NO_VAL)
				(*limit_set_max_nodes) = 1;
			job_desc->max_nodes = assoc_ptr->grp_nodes;
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->grp_submit_jobs == INFINITE)) &&
		    (assoc_ptr->grp_submit_jobs != INFINITE) &&
		    (assoc_ptr->usage->used_submit_jobs
		     >= assoc_ptr->grp_submit_jobs)) {
			info("job submit for user %s(%u): "
			     "group max submit job limit exceeded %u "
			     "for account '%s'",
			     user_name,
			     job_desc->user_id,
			     assoc_ptr->grp_submit_jobs,
			     assoc_ptr->acct);
			rc = false;
			break;
		}


		/* for validation we don't need to look at
		 * assoc_ptr->grp_wall. It is checked while the job is running.
		 */

		/* We don't need to look at the regular limits for
		 * parents since we have pre-propogated them, so just
		 * continue with the next parent
		 */
		if (parent) {
			assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
			continue;
		}

		/* for validation we don't need to look at
		 * assoc_ptr->max_cpu_mins_pj.
		 */

		if (((*limit_set_max_cpus) == ADMIN_SET_LIMIT)
		    || (qos_ptr && (qos_ptr->max_cpus_pj != INFINITE))
		    || (assoc_ptr->max_cpus_pj == INFINITE)
		    || (update_call && (job_desc->max_cpus == NO_VAL))) {
			/* no need to check/set */
		} else if ((job_desc->min_cpus != NO_VAL)
			   && (job_desc->min_cpus > assoc_ptr->max_cpus_pj)) {
			info("job submit for user %s(%u): "
			     "min cpu limit %u exceeds "
			     "account max %u",
			     user_name,
			     job_desc->user_id,
			     job_desc->min_cpus,
			     assoc_ptr->max_cpus_pj);
			rc = false;
			break;
		} else if (job_desc->max_cpus == NO_VAL
			   || ((*limit_set_max_cpus)
			       && (job_desc->max_cpus
				   > assoc_ptr->max_cpus_pj))) {
			job_desc->max_cpus = assoc_ptr->max_cpus_pj;
			(*limit_set_max_cpus) = 1;
		} else if (job_desc->max_cpus > assoc_ptr->max_cpus_pj) {
			info("job submit for user %s(%u): "
			     "max cpu changed %u -> %u because "
			     "of account limit",
			     user_name,
			     job_desc->user_id,
			     job_desc->max_cpus,
			     assoc_ptr->max_cpus_pj);
			if (job_desc->max_cpus == NO_VAL)
				(*limit_set_max_cpus) = 1;
			job_desc->max_cpus = assoc_ptr->max_cpus_pj;
		}

		/* for validation we don't need to look at
		 * assoc_ptr->max_jobs.
		 */

		if (((*limit_set_max_nodes) == ADMIN_SET_LIMIT)
		    || (qos_ptr && (qos_ptr->max_nodes_pj != INFINITE))
		    || (assoc_ptr->max_nodes_pj == INFINITE)
		    || (update_call && (job_desc->max_nodes == NO_VAL))) {
			/* no need to check/set */
		} else if ((job_desc->min_nodes != NO_VAL)
			   && (job_desc->min_nodes > assoc_ptr->max_nodes_pj)) {
			info("job submit for user %s(%u): "
			     "min node limit %u exceeds "
			     "account max %u",
			     user_name,
			     job_desc->user_id,
			     job_desc->min_nodes,
			     assoc_ptr->max_nodes_pj);
			rc = false;
			break;
		} else if (((job_desc->max_nodes == NO_VAL)
			    || (job_desc->max_nodes == 0))
			   || ((*limit_set_max_nodes)
			       && (job_desc->max_nodes
				   > assoc_ptr->max_nodes_pj))) {
			job_desc->max_nodes = assoc_ptr->max_nodes_pj;
			(*limit_set_max_nodes) = 1;
		} else if (job_desc->max_nodes > assoc_ptr->max_nodes_pj) {
			info("job submit for user %s(%u): "
			     "max node changed %u -> %u because "
			     "of account limit",
			     user_name,
			     job_desc->user_id,
			     job_desc->max_nodes,
			     assoc_ptr->max_nodes_pj);
			if (job_desc->max_nodes == NO_VAL)
				(*limit_set_max_nodes) = 1;
			job_desc->max_nodes = assoc_ptr->max_nodes_pj;
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->max_submit_jobs_pu == INFINITE)) &&
		    (assoc_ptr->max_submit_jobs != INFINITE) &&
		    (assoc_ptr->usage->used_submit_jobs
		     >= assoc_ptr->max_submit_jobs)) {
			info("job submit for user %s(%u): "
			     "account max submit job limit exceeded %u",
			     user_name,
			     job_desc->user_id,
			     assoc_ptr->max_submit_jobs);
			rc = false;
			break;
		}

		if (((*limit_set_time) == ADMIN_SET_LIMIT)
		    || (qos_ptr && (qos_ptr->max_wall_pj != INFINITE))
		    || (assoc_ptr->max_wall_pj == INFINITE)
		    || (update_call && (job_desc->time_limit == NO_VAL))) {
			/* no need to check/set */
		} else {
			time_limit = assoc_ptr->max_wall_pj;
			if (job_desc->time_limit == NO_VAL) {
				if (part_ptr->max_time == INFINITE)
					job_desc->time_limit = time_limit;
				else
					job_desc->time_limit =
						MIN(time_limit,
						    part_ptr->max_time);
				(*limit_set_time) = 1;
			} else if ((*limit_set_time) &&
				   job_desc->time_limit > time_limit) {
				job_desc->time_limit = time_limit;
			} else if (job_desc->time_limit > time_limit) {
				info("job submit for user %s(%u): "
				     "time limit %u exceeds account max %u",
				     user_name,
				     job_desc->user_id,
				     job_desc->time_limit, time_limit);
				rc = false;
				break;
			}
		}

		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		parent = 1;
	}
end_it:
	assoc_mgr_unlock(&locks);

	return rc;
}
Beispiel #22
0
static void *_decay_thread(void *no_data)
{
	struct job_record *job_ptr = NULL;
	ListIterator itr;
	time_t start_time = time(NULL);
	time_t next_time;
/* 	int sigarray[] = {SIGUSR1, 0}; */
	struct tm tm;
	time_t last_ran = 0;
	time_t last_reset = 0, next_reset = 0;
	uint32_t calc_period = slurm_get_priority_calc_period();
	double decay_hl = (double)slurm_get_priority_decay_hl();
	double decay_factor = 1;
	uint16_t reset_period = slurm_get_priority_reset_period();

	/* Write lock on jobs, read lock on nodes and partitions */
	slurmctld_lock_t job_write_lock =
		{ NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK };
	assoc_mgr_lock_t locks = { WRITE_LOCK, NO_LOCK,
				   NO_LOCK, NO_LOCK, NO_LOCK };

	if (decay_hl > 0)
		decay_factor = 1 - (0.693 / decay_hl);

	(void) pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
	(void) pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);

	if (!localtime_r(&start_time, &tm)) {
		fatal("_decay_thread: "
		      "Couldn't get localtime for rollup handler %ld",
		      (long)start_time);
		return NULL;
	}

	_read_last_decay_ran(&last_ran, &last_reset);
	if (last_reset == 0)
		last_reset = start_time;

	_init_grp_used_cpu_run_secs(last_ran);

	while (1) {
		time_t now = time(NULL);
		int run_delta = 0;
		double real_decay = 0.0;

		slurm_mutex_lock(&decay_lock);
		running_decay = 1;

		/* If reconfig is called handle all that happens
		   outside of the loop here */
		if (reconfig) {
			/* if decay_hl is 0 or less that means no
			   decay is to be had.  This also means we
			   flush the used time at a certain time
			   set by PriorityUsageResetPeriod in the slurm.conf
			*/
			calc_period = slurm_get_priority_calc_period();
			reset_period = slurm_get_priority_reset_period();
			next_reset = 0;
			decay_hl = (double)slurm_get_priority_decay_hl();
			if (decay_hl > 0)
				decay_factor = 1 - (0.693 / decay_hl);
			else
				decay_factor = 1;

			reconfig = 0;
		}

		/* this needs to be done right away so as to
		 * incorporate it into the decay loop.
		 */
		switch(reset_period) {
		case PRIORITY_RESET_NONE:
			break;
		case PRIORITY_RESET_NOW:	/* do once */
			_reset_usage();
			reset_period = PRIORITY_RESET_NONE;
			last_reset = now;
			break;
		case PRIORITY_RESET_DAILY:
		case PRIORITY_RESET_WEEKLY:
		case PRIORITY_RESET_MONTHLY:
		case PRIORITY_RESET_QUARTERLY:
		case PRIORITY_RESET_YEARLY:
			if (next_reset == 0) {
				next_reset = _next_reset(reset_period,
							 last_reset);
			}
			if (now >= next_reset) {
				_reset_usage();
				last_reset = next_reset;
				next_reset = _next_reset(reset_period,
							 last_reset);
			}
		}

		/* now calculate all the normalized usage here */
		assoc_mgr_lock(&locks);
		_set_children_usage_efctv(
			assoc_mgr_root_assoc->usage->childern_list);
		assoc_mgr_unlock(&locks);

		if (!last_ran)
			goto get_usage;
		else
			run_delta = (start_time - last_ran);

		if (run_delta <= 0)
			goto get_usage;

		real_decay = pow(decay_factor, (double)run_delta);

		if (priority_debug)
			info("Decay factor over %d seconds goes "
			     "from %.15f -> %.15f",
			     run_delta, decay_factor, real_decay);

		/* first apply decay to used time */
		if (_apply_decay(real_decay) != SLURM_SUCCESS) {
			error("problem applying decay");
			running_decay = 0;
			slurm_mutex_unlock(&decay_lock);
			break;
		}
		lock_slurmctld(job_write_lock);
		itr = list_iterator_create(job_list);
		while ((job_ptr = list_next(itr))) {
			/* apply new usage */
			if (!IS_JOB_PENDING(job_ptr) &&
			    job_ptr->start_time && job_ptr->assoc_ptr) {
				if (!_apply_new_usage(job_ptr, decay_factor,
						      last_ran, start_time))
					continue;
			}

			/*
			 * Priority 0 is reserved for held jobs. Also skip
			 * priority calculation for non-pending jobs.
			 */
			if ((job_ptr->priority == 0)
			    || !IS_JOB_PENDING(job_ptr))
				continue;

			job_ptr->priority =
				_get_priority_internal(start_time, job_ptr);
			last_job_update = time(NULL);
			debug2("priority for job %u is now %u",
			       job_ptr->job_id, job_ptr->priority);
		}
		list_iterator_destroy(itr);
		unlock_slurmctld(job_write_lock);

	get_usage:
		last_ran = start_time;

		_write_last_decay_ran(last_ran, last_reset);

		running_decay = 0;
		slurm_mutex_unlock(&decay_lock);

		/* sleep for calc_period secs */
		tm.tm_sec += calc_period;
		tm.tm_isdst = -1;
		next_time = mktime(&tm);
		sleep((next_time-start_time));
		start_time = next_time;
		/* repeat ;) */
	}
	return NULL;
}
extern List as_mysql_jobacct_process_get_jobs(mysql_conn_t *mysql_conn,
					      uid_t uid,
					      slurmdb_job_cond_t *job_cond)
{
	char *extra = NULL;
	char *tmp = NULL, *tmp2 = NULL;
	ListIterator itr = NULL;
	int is_admin=1;
	int i;
	List job_list = NULL;
	uint16_t private_data = 0;
	slurmdb_user_rec_t user;
	int only_pending = 0;
	List use_cluster_list = as_mysql_cluster_list;
	char *cluster_name;
	assoc_mgr_lock_t locks = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK,
				   READ_LOCK, NO_LOCK, NO_LOCK };

	memset(&user, 0, sizeof(slurmdb_user_rec_t));
	user.uid = uid;

	private_data = slurm_get_private_data();
	if (private_data & PRIVATE_DATA_JOBS) {
		if (!(is_admin = is_user_min_admin_level(
			      mysql_conn, uid, SLURMDB_ADMIN_OPERATOR))) {
			/* Only fill in the coordinator accounts here
			   we will check them later when we actually
			   try to get the jobs.
			*/
			is_user_any_coord(mysql_conn, &user);
		}
		if (!is_admin && !user.name) {
			debug("User %u has no assocations, and is not admin, "
			      "so not returning any jobs.", user.uid);
			return NULL;
		}
	}

	if (job_cond
	    && job_cond->state_list && (list_count(job_cond->state_list) == 1)
	    && (slurm_atoul(list_peek(job_cond->state_list)) == JOB_PENDING))
		only_pending = 1;

	setup_job_cond_limits(job_cond, &extra);

	xfree(tmp);
	xstrfmtcat(tmp, "%s", job_req_inx[0]);
	for(i=1; i<JOB_REQ_COUNT; i++) {
		xstrfmtcat(tmp, ", %s", job_req_inx[i]);
	}

	xfree(tmp2);
	xstrfmtcat(tmp2, "%s", step_req_inx[0]);
	for(i=1; i<STEP_REQ_COUNT; i++) {
		xstrfmtcat(tmp2, ", %s", step_req_inx[i]);
	}

	if (job_cond
	    && job_cond->cluster_list && list_count(job_cond->cluster_list))
		use_cluster_list = job_cond->cluster_list;
	else
		slurm_mutex_lock(&as_mysql_cluster_list_lock);

	assoc_mgr_lock(&locks);

	job_list = list_create(slurmdb_destroy_job_rec);
	itr = list_iterator_create(use_cluster_list);
	while ((cluster_name = list_next(itr))) {
		int rc;
		if ((rc = _cluster_get_jobs(mysql_conn, &user, job_cond,
					    cluster_name, tmp, tmp2, extra,
					    is_admin, only_pending, job_list))
		    != SLURM_SUCCESS)
			error("Problem getting jobs for cluster %s",
			      cluster_name);
	}
	list_iterator_destroy(itr);

	assoc_mgr_unlock(&locks);

	if (use_cluster_list == as_mysql_cluster_list)
		slurm_mutex_unlock(&as_mysql_cluster_list_lock);

	xfree(tmp);
	xfree(tmp2);
	xfree(extra);

	return job_list;
}
Beispiel #24
0
static void _handle_stats(List prec_list, char *proc_stat_file,
			  char *proc_io_file, char *proc_smaps_file,
			  jag_callbacks_t *callbacks,
			  int tres_count)
{
	static int no_share_data = -1;
	static int use_pss = -1;
	FILE *stat_fp = NULL;
	FILE *io_fp = NULL;
	int fd, fd2, i;
	jag_prec_t *prec = NULL;

	if (no_share_data == -1) {
		char *acct_params = slurm_get_jobacct_gather_params();
		if (acct_params && xstrcasestr(acct_params, "NoShare"))
			no_share_data = 1;
		else
			no_share_data = 0;

		if (acct_params && xstrcasestr(acct_params, "UsePss"))
			use_pss = 1;
		else
			use_pss = 0;
		xfree(acct_params);
	}

	if (!(stat_fp = fopen(proc_stat_file, "r")))
		return;  /* Assume the process went away */
	/*
	 * Close the file on exec() of user tasks.
	 *
	 * NOTE: If we fork() slurmstepd after the
	 * fopen() above and before the fcntl() below,
	 * then the user task may have this extra file
	 * open, which can cause problems for
	 * checkpoint/restart, but this should be a very rare
	 * problem in practice.
	 */
	fd = fileno(stat_fp);
	if (fcntl(fd, F_SETFD, FD_CLOEXEC) == -1)
		error("%s: fcntl(%s): %m", __func__, proc_stat_file);

	prec = xmalloc(sizeof(jag_prec_t));

	if (!tres_count) {
		assoc_mgr_lock_t locks = {
			NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK,
			READ_LOCK, NO_LOCK, NO_LOCK };
		assoc_mgr_lock(&locks);
		tres_count = g_tres_count;
		assoc_mgr_unlock(&locks);
	}

	prec->tres_count = tres_count;
	prec->tres_data = xmalloc(prec->tres_count *
				  sizeof(acct_gather_data_t));

	/* Initialize read/writes */
	for (i = 0; i < prec->tres_count; i++) {
		prec->tres_data[i].num_reads = INFINITE64;
		prec->tres_data[i].num_writes = INFINITE64;
		prec->tres_data[i].size_read = INFINITE64;
		prec->tres_data[i].size_write = INFINITE64;
	}

	if (!_get_process_data_line(fd, prec)) {
		xfree(prec->tres_data);
		xfree(prec);
		fclose(stat_fp);
		return;
	}
	fclose(stat_fp);

	if (acct_gather_filesystem_g_get_data(prec->tres_data) < 0) {
		debug2("problem retrieving filesystem data");
	}

	if (acct_gather_interconnect_g_get_data(prec->tres_data) < 0) {
		debug2("problem retrieving interconnect data");
	}

	/* Remove shared data from rss */
	if (no_share_data)
		_remove_share_data(proc_stat_file, prec);

	/* Use PSS instead if RSS */
	if (use_pss) {
		if (_get_pss(proc_smaps_file, prec) == -1) {
			xfree(prec->tres_data);
			xfree(prec);
			return;
		}
	}

	list_append(prec_list, prec);

	if ((io_fp = fopen(proc_io_file, "r"))) {
		fd2 = fileno(io_fp);
		if (fcntl(fd2, F_SETFD, FD_CLOEXEC) == -1)
			error("%s: fcntl: %m", __func__);
		_get_process_io_data_line(fd2, prec);
		fclose(io_fp);
	}
}