Ejemplo n.º 1
0
int main(int argc, char * argv[])
{
	if (argc != 6) {
		printf("Usage: %s, control_addr job_id1 job_id2 sched_port is_bluegene\n",
			argv[0]);
		exit(1);
	}

	control_addr = argv[1];
	job_id1      = atoi(argv[2]);
	job_id2      = atoi(argv[3]);
	sched_port   = atoi(argv[4]);
	is_bluegene  = atoi(argv[5]);
	printf("control_addr=%s job_id=%ld,%ld sched_port=%d is_bluegene=%d\n",
		control_addr, job_id1, job_id2, sched_port, is_bluegene);

	_get_jobs();
	_get_nodes();
	_modify_job(job_id1);
	_get_jobs();
	_start_job(job_id1);
	if (!is_bluegene) {
		_suspend_job(job_id1);
		_resume_job(job_id1);
	}
	_cancel_job(job_id2);
	sleep(5);
	_get_jobs();

	printf("SUCCESS\n");
	exit(0);
}
Ejemplo n.º 2
0
int main(int argc, char * argv[])
{
	if (argc < 6) {
		printf("Usage: %s, auth_key control_addr e_port "
			"job_id sched_port is_bluegene\n", argv[0]);
		exit(1);
	}

	auth_key     = argv[1];
	control_addr = argv[2];
	e_port       = atoi(argv[3]);
	job_id       = atoi(argv[4]);
	sched_port   = atoi(argv[5]);
	is_bluegene  = atoi(argv[6]);
	printf("auth_key=%s control_addr=%s e_port=%d job_id=%d sched_port=%d "
		"is_bluegene=%d\n",
		auth_key, control_addr, e_port, job_id, sched_port, is_bluegene);

#if _DEBUG
	_single_msg();
#else
	_initialize();
	_get_jobs();
	_get_nodes();
	_job_will_run(job_id);
	_modify_job(job_id);
	_get_jobs();
	_start_job(job_id);
	_get_jobs();
	if (!is_bluegene) {
		_suspend_job(job_id);
		_resume_job(job_id);
	}
	_notify_job(job_id);
	_signal_job(job_id);
	if (e_port)
		_event_mgr();
	else {
		printf("READY\n");
		sleep(3);
	}
	_cancel_job(job_id+1);
	_job_requeue(job_id);	/* Put job back into HELD state */
	sleep(15);
	_start_job(job_id);
	_get_jobs();
#endif
	printf("SUCCESS\n");
	exit(0);
}
Ejemplo n.º 3
0
/*
 * acct_policy_job_runnable - Determine of the specified job can execute
 *	right now or not depending upon accounting policy (e.g. running
 *	job limit for this association). If the association limits prevent
 *	the job from ever running (lowered limits since job submission),
 *	then cancel the job.
 */
extern bool acct_policy_job_runnable(struct job_record *job_ptr)
{
	slurmdb_qos_rec_t *qos_ptr;
	slurmdb_association_rec_t *assoc_ptr;
	uint32_t time_limit;
	uint64_t cpu_time_limit;
	uint64_t job_cpu_time_limit;
	bool rc = true;
	uint64_t usage_mins;
	uint32_t wall_mins;
	bool cancel_job = 0;
	int parent = 0; /*flag to tell us if we are looking at the
			 * parent or not
			 */
	assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK,
				   READ_LOCK, NO_LOCK, NO_LOCK };

	/* check to see if we are enforcing associations */
	if (!accounting_enforce)
		return true;

	if (!_valid_job_assoc(job_ptr)) {
		_cancel_job(job_ptr);
		return false;
	}

	/* now see if we are enforcing limits */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return true;

	/* clear old state reason */
        if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) ||
	    (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) ||
	    (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT))
                job_ptr->state_reason = WAIT_NO_REASON;


	job_cpu_time_limit = (uint64_t)job_ptr->time_limit
		* (uint64_t)job_ptr->details->min_cpus;

	assoc_mgr_lock(&locks);
	qos_ptr = job_ptr->qos_ptr;
	if(qos_ptr) {
		usage_mins = (uint64_t)(qos_ptr->usage->usage_raw / 60.0);
		wall_mins = qos_ptr->usage->grp_used_wall / 60;

		if ((qos_ptr->grp_cpu_mins != (uint64_t)INFINITE)
		    && (usage_mins >= qos_ptr->grp_cpu_mins)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("Job %u being held, "
			       "the job is at or exceeds QOS %s's "
			       "group max cpu minutes of %"PRIu64" "
			       "with %"PRIu64"",
			       job_ptr->job_id,
			       qos_ptr->name,
			       qos_ptr->grp_cpu_mins,
			       usage_mins);
			rc = false;
			goto end_it;
		}

		if ((job_ptr->limit_set_min_cpus != ADMIN_SET_LIMIT)
		    && qos_ptr->grp_cpus != INFINITE) {
			if (job_ptr->details->min_cpus > qos_ptr->grp_cpus) {
				info("job %u is being cancelled, "
				     "min cpu request %u exceeds "
				     "group max cpu limit %u for "
				     "qos '%s'",
				     job_ptr->job_id,
				     job_ptr->details->min_cpus,
				     qos_ptr->grp_cpus,
				     qos_ptr->name);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}

			if ((qos_ptr->usage->grp_used_cpus +
			     job_ptr->details->min_cpus) > qos_ptr->grp_cpus) {
				job_ptr->state_reason =
					WAIT_ASSOC_RESOURCE_LIMIT;
				xfree(job_ptr->state_desc);
				debug2("job %u being held, "
				       "the job is at or exceeds "
				       "group max cpu limit %u "
				       "with already used %u + requested %u "
				       "for qos %s",
				       job_ptr->job_id,
				       qos_ptr->grp_cpus,
				       qos_ptr->usage->grp_used_cpus,
				       job_ptr->details->min_cpus,
				       qos_ptr->name);
				rc = false;
				goto end_it;
			}
		}

		if ((qos_ptr->grp_jobs != INFINITE) &&
		    (qos_ptr->usage->grp_used_jobs >= qos_ptr->grp_jobs)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "the job is at or exceeds "
			       "group max jobs limit %u with %u for qos %s",
			       job_ptr->job_id,
			       qos_ptr->grp_jobs,
			       qos_ptr->usage->grp_used_jobs, qos_ptr->name);

			rc = false;
			goto end_it;
		}

		if ((job_ptr->limit_set_min_nodes != ADMIN_SET_LIMIT)
		    && qos_ptr->grp_nodes != INFINITE) {
			if (job_ptr->details->min_nodes > qos_ptr->grp_nodes) {
				info("job %u is being cancelled, "
				     "min node request %u exceeds "
				     "group max node limit %u for "
				     "qos '%s'",
				     job_ptr->job_id,
				     job_ptr->details->min_nodes,
				     qos_ptr->grp_nodes,
				     qos_ptr->name);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}

			if ((qos_ptr->usage->grp_used_nodes +
			     job_ptr->details->min_nodes) >
			    qos_ptr->grp_nodes) {
				job_ptr->state_reason =
					WAIT_ASSOC_RESOURCE_LIMIT;
				xfree(job_ptr->state_desc);
				debug2("job %u being held, "
				       "the job is at or exceeds "
				       "group max node limit %u "
				       "with already used %u + requested %u "
				       "for qos %s",
				       job_ptr->job_id,
				       qos_ptr->grp_nodes,
				       qos_ptr->usage->grp_used_nodes,
				       job_ptr->details->min_nodes,
				       qos_ptr->name);
				rc = false;
				goto end_it;
			}
		}

		/* we don't need to check submit_jobs here */

		if ((qos_ptr->grp_wall != INFINITE)
		    && (wall_mins >= qos_ptr->grp_wall)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "the job is at or exceeds "
			       "group wall limit %u "
			       "with %u for qos %s",
			       job_ptr->job_id,
			       qos_ptr->grp_wall,
			       wall_mins, qos_ptr->name);
			rc = false;
			goto end_it;
		}

		if (qos_ptr->max_cpu_mins_pj != INFINITE) {
			cpu_time_limit = qos_ptr->max_cpu_mins_pj;
			if ((job_ptr->time_limit != NO_VAL) &&
			    (job_cpu_time_limit > cpu_time_limit)) {
				info("job %u being cancelled, "
				     "cpu time limit %"PRIu64" exceeds "
				     "qos max per job %"PRIu64"",
				     job_ptr->job_id,
				     job_cpu_time_limit,
				     cpu_time_limit);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		if ((job_ptr->limit_set_min_cpus != ADMIN_SET_LIMIT)
		    && qos_ptr->max_cpus_pj != INFINITE) {
			if (job_ptr->details->min_cpus >
			    qos_ptr->max_cpus_pj) {
				info("job %u being cancelled, "
				     "min cpu limit %u exceeds "
				     "qos max %u",
				     job_ptr->job_id,
				     job_ptr->details->min_cpus,
				     qos_ptr->max_cpus_pj);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		if (qos_ptr->max_jobs_pu != INFINITE) {
			slurmdb_used_limits_t *used_limits = NULL;
			if(qos_ptr->usage->user_limit_list) {
				ListIterator itr = list_iterator_create(
					qos_ptr->usage->user_limit_list);
				while((used_limits = list_next(itr))) {
					if(used_limits->uid == job_ptr->user_id)
						break;
				}
				list_iterator_destroy(itr);
			}
			if(used_limits && (used_limits->jobs
					   >= qos_ptr->max_jobs_pu)) {
				debug2("job %u being held, "
				       "the job is at or exceeds "
				       "max jobs limit %u with %u for QOS %s",
				       job_ptr->job_id,
				       qos_ptr->max_jobs_pu,
				       used_limits->jobs, qos_ptr->name);
				rc = false;
				goto end_it;
			}
		}

		if ((job_ptr->limit_set_min_nodes != ADMIN_SET_LIMIT)
		    && qos_ptr->max_nodes_pj != INFINITE) {
			if (job_ptr->details->min_nodes >
			    qos_ptr->max_nodes_pj) {
				info("job %u being cancelled, "
				     "min node limit %u exceeds "
				     "qos max %u",
				     job_ptr->job_id,
				     job_ptr->details->min_nodes,
				     qos_ptr->max_nodes_pj);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		/* we don't need to check submit_jobs_pu here */

		/* if the qos limits have changed since job
		 * submission and job can not run, then kill it */
		if ((job_ptr->limit_set_time != ADMIN_SET_LIMIT)
		    && qos_ptr->max_wall_pj != INFINITE) {
			time_limit = qos_ptr->max_wall_pj;
			if ((job_ptr->time_limit != NO_VAL) &&
			    (job_ptr->time_limit > time_limit)) {
				info("job %u being cancelled, "
				     "time limit %u exceeds qos "
				     "max wall pj %u",
				     job_ptr->job_id,
				     job_ptr->time_limit,
				     time_limit);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}
	}

	assoc_ptr = job_ptr->assoc_ptr;
	while(assoc_ptr) {
		usage_mins = (uint64_t)(assoc_ptr->usage->usage_raw / 60.0);
		wall_mins = assoc_ptr->usage->grp_used_wall / 60;
#if _DEBUG
		info("acct_job_limits: %u of %u",
		     assoc_ptr->usage->used_jobs, assoc_ptr->max_jobs);
#endif
		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->grp_cpu_mins == (uint64_t)INFINITE))
		    && (assoc_ptr->grp_cpu_mins != (uint64_t)INFINITE)
		    && (usage_mins >= assoc_ptr->grp_cpu_mins)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "assoc %u is at or exceeds "
			       "group max cpu minutes limit %"PRIu64" "
			       "with %Lf for account %s",
			       job_ptr->job_id, assoc_ptr->id,
			       assoc_ptr->grp_cpu_mins,
			       assoc_ptr->usage->usage_raw, assoc_ptr->acct);

			rc = false;
			goto end_it;
		}

		if ((job_ptr->limit_set_min_cpus != ADMIN_SET_LIMIT)
		    && (!qos_ptr ||
			(qos_ptr && qos_ptr->grp_cpus == INFINITE))
		    && (assoc_ptr->grp_cpus != INFINITE)) {
			if (job_ptr->details->min_cpus > assoc_ptr->grp_cpus) {
				info("job %u being cancelled, "
				     "min cpu request %u exceeds "
				     "group max cpu limit %u for "
				     "account %s",
				     job_ptr->job_id,
				     job_ptr->details->min_cpus,
				     assoc_ptr->grp_cpus,
				     assoc_ptr->acct);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}

			if ((assoc_ptr->usage->grp_used_cpus +
				    job_ptr->details->min_cpus) >
				   assoc_ptr->grp_cpus) {
				job_ptr->state_reason =
					WAIT_ASSOC_RESOURCE_LIMIT;
				xfree(job_ptr->state_desc);
				debug2("job %u being held, "
				       "assoc %u is at or exceeds "
				       "group max cpu limit %u "
				       "with already used %u + requested %u "
				       "for account %s",
				       job_ptr->job_id, assoc_ptr->id,
				       assoc_ptr->grp_cpus,
				       assoc_ptr->usage->grp_used_cpus,
				       job_ptr->details->min_cpus,
				       assoc_ptr->acct);
				rc = false;
				goto end_it;
			}
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->grp_jobs == INFINITE)) &&
		    (assoc_ptr->grp_jobs != INFINITE) &&
		    (assoc_ptr->usage->used_jobs >= assoc_ptr->grp_jobs)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "assoc %u is at or exceeds "
			       "group max jobs limit %u with %u for account %s",
			       job_ptr->job_id, assoc_ptr->id,
			       assoc_ptr->grp_jobs,
			       assoc_ptr->usage->used_jobs, assoc_ptr->acct);

			rc = false;
			goto end_it;
		}

		if ((job_ptr->limit_set_min_nodes != ADMIN_SET_LIMIT)
		    && (!qos_ptr ||
			(qos_ptr && qos_ptr->grp_nodes == INFINITE))
		    && (assoc_ptr->grp_nodes != INFINITE)) {
			if (job_ptr->details->min_nodes >
			    assoc_ptr->grp_nodes) {
				info("job %u being cancelled, "
				     "min node request %u exceeds "
				     "group max node limit %u for "
				     "account %s",
				     job_ptr->job_id,
				     job_ptr->details->min_nodes,
				     assoc_ptr->grp_nodes,
				     assoc_ptr->acct);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}

			if ((assoc_ptr->usage->grp_used_nodes +
			     job_ptr->details->min_nodes) >
			    assoc_ptr->grp_nodes) {
				job_ptr->state_reason =
					WAIT_ASSOC_RESOURCE_LIMIT;
				xfree(job_ptr->state_desc);
				debug2("job %u being held, "
				       "assoc %u is at or exceeds "
				       "group max node limit %u "
				       "with already used %u + requested %u "
				       "for account %s",
				       job_ptr->job_id, assoc_ptr->id,
				       assoc_ptr->grp_nodes,
				       assoc_ptr->usage->grp_used_nodes,
				       job_ptr->details->min_nodes,
				       assoc_ptr->acct);
				rc = false;
				goto end_it;
			}
		}

		/* we don't need to check submit_jobs here */

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->grp_wall == INFINITE))
		    && (assoc_ptr->grp_wall != INFINITE)
		    && (wall_mins >= assoc_ptr->grp_wall)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "assoc %u is at or exceeds "
			       "group wall limit %u "
			       "with %u for account %s",
			       job_ptr->job_id, assoc_ptr->id,
			       assoc_ptr->grp_wall,
			       wall_mins, assoc_ptr->acct);
			rc = false;
			goto end_it;
		}


		/* We don't need to look at the regular limits for
		 * parents since we have pre-propogated them, so just
		 * continue with the next parent
		 */
		if(parent) {
			assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
			continue;
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->max_cpu_mins_pj == INFINITE)) &&
		    (assoc_ptr->max_cpu_mins_pj != INFINITE)) {
			cpu_time_limit = assoc_ptr->max_cpu_mins_pj;
			if ((job_ptr->time_limit != NO_VAL) &&
			    (job_cpu_time_limit > cpu_time_limit)) {
				info("job %u being cancelled, "
				     "cpu time limit %"PRIu64" exceeds "
				     "assoc max per job %"PRIu64"",
				     job_ptr->job_id,
				     job_cpu_time_limit,
				     cpu_time_limit);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->max_cpus_pj == INFINITE)) &&
		    (assoc_ptr->max_cpus_pj != INFINITE)) {
			if (job_ptr->details->min_cpus >
			    assoc_ptr->max_cpus_pj) {
				info("job %u being cancelled, "
				     "min cpu limit %u exceeds "
				     "account max %u",
				     job_ptr->job_id,
				     job_ptr->details->min_cpus,
				     assoc_ptr->max_cpus_pj);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->max_jobs_pu == INFINITE)) &&
		    (assoc_ptr->max_jobs != INFINITE) &&
		    (assoc_ptr->usage->used_jobs >= assoc_ptr->max_jobs)) {
			job_ptr->state_reason = WAIT_ASSOC_JOB_LIMIT;
			xfree(job_ptr->state_desc);
			debug2("job %u being held, "
			       "assoc %u is at or exceeds "
			       "max jobs limit %u with %u for account %s",
			       job_ptr->job_id, assoc_ptr->id,
			       assoc_ptr->max_jobs,
			       assoc_ptr->usage->used_jobs, assoc_ptr->acct);
			rc = false;
			goto end_it;
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->max_nodes_pj == INFINITE))
		    && (assoc_ptr->max_nodes_pj != INFINITE)) {
			if (job_ptr->details->min_nodes >
			    assoc_ptr->max_nodes_pj) {
				info("job %u being cancelled, "
				     "min node limit %u exceeds "
				     "account max %u",
				     job_ptr->job_id,
				     job_ptr->details->min_nodes,
				     assoc_ptr->max_nodes_pj);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		/* we don't need to check submit_jobs here */

		/* if the association limits have changed since job
		 * submission and job can not run, then kill it */
		if ((job_ptr->limit_set_time != ADMIN_SET_LIMIT)
		    && (!qos_ptr ||
			(qos_ptr && qos_ptr->max_wall_pj == INFINITE))
		    && (assoc_ptr->max_wall_pj != INFINITE)) {
			time_limit = assoc_ptr->max_wall_pj;
			if ((job_ptr->time_limit != NO_VAL) &&
			    (job_ptr->time_limit > time_limit)) {
				info("job %u being cancelled, "
				     "time limit %u exceeds account "
				     "max %u",
				     job_ptr->job_id,
				     job_ptr->time_limit,
				     time_limit);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}

		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		parent = 1;
	}
end_it:
	assoc_mgr_unlock(&locks);

	if(cancel_job)
		_cancel_job(job_ptr);

	return rc;
}
Ejemplo n.º 4
0
extern bool acct_policy_node_usable(struct job_record *job_ptr,
				    uint32_t used_cpus,
				    char *node_name, uint32_t node_cpus)
{
	slurmdb_qos_rec_t *qos_ptr;
	slurmdb_association_rec_t *assoc_ptr;
	bool rc = true;
	uint32_t total_cpus = used_cpus + node_cpus;
	bool cancel_job = 0;
	int parent = 0; /* flag to tell us if we are looking at the
			 * parent or not
			 */
	assoc_mgr_lock_t locks = { READ_LOCK, NO_LOCK,
				   READ_LOCK, NO_LOCK, NO_LOCK };

	/* check to see if we are enforcing associations */
	if (!accounting_enforce)
		return true;

	if (!_valid_job_assoc(job_ptr)) {
		_cancel_job(job_ptr);
		return false;
	}

	/* now see if we are enforcing limits */
	if (!(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return true;

	/* clear old state reason */
        if ((job_ptr->state_reason == WAIT_ASSOC_JOB_LIMIT) ||
	    (job_ptr->state_reason == WAIT_ASSOC_RESOURCE_LIMIT) ||
	    (job_ptr->state_reason == WAIT_ASSOC_TIME_LIMIT))
                job_ptr->state_reason = WAIT_NO_REASON;


	assoc_mgr_lock(&locks);
	qos_ptr = job_ptr->qos_ptr;
	if(qos_ptr) {
		if (qos_ptr->grp_cpus != INFINITE) {
			if ((total_cpus+qos_ptr->usage->grp_used_cpus)
			    > qos_ptr->grp_cpus) {
				debug("Can't use %s, adding it's %u cpus "
				      "exceeds "
				      "group max cpu limit %u for qos '%s'",
				     node_name,
				     node_cpus,
				     qos_ptr->grp_cpus,
				     qos_ptr->name);
				rc = false;
				goto end_it;
			}
		}

		if (qos_ptr->max_cpus_pj != INFINITE) {
			if (total_cpus > qos_ptr->max_cpus_pj) {
				debug("Can't use %s, adding it's %u cpus "
				      "exceeds "
				      "max cpu limit %u for qos '%s'",
				      node_name,
				      node_cpus,
				      qos_ptr->max_cpus_pj,
				      qos_ptr->name);
				cancel_job = 1;
				rc = false;
				goto end_it;
			}
		}
	}

	assoc_ptr = job_ptr->assoc_ptr;
	while(assoc_ptr) {
		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->grp_cpus == INFINITE))
		    && (assoc_ptr->grp_cpus != INFINITE)) {
			if ((total_cpus+assoc_ptr->usage->grp_used_cpus)
			    > assoc_ptr->grp_cpus) {
				debug("Can't use %s, adding it's %u cpus "
				      "exceeds "
				      "group max cpu limit %u for account '%s'",
				      node_name,
				      node_cpus,
				      assoc_ptr->grp_cpus,
				      assoc_ptr->acct);
				rc = false;
				goto end_it;
			}
		}

		/* We don't need to look at the regular limits for
		 * parents since we have pre-propogated them, so just
		 * continue with the next parent
		 */
		if(parent) {
			assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
			continue;
		}

		if ((!qos_ptr ||
		     (qos_ptr && qos_ptr->max_cpus_pj == INFINITE)) &&
		    (assoc_ptr->max_cpus_pj != INFINITE)) {
			if (job_ptr->details->min_cpus >
			    assoc_ptr->max_cpus_pj) {
				debug("Can't use %s, adding it's %u cpus "
				      "exceeds "
				      "max cpu limit %u for account '%s'",
				      node_name,
				      node_cpus,
				      assoc_ptr->max_cpus_pj,
				      assoc_ptr->acct);
				rc = false;
				goto end_it;
			}
		}
		assoc_ptr = assoc_ptr->usage->parent_assoc_ptr;
		parent = 1;
	}
end_it:
	assoc_mgr_unlock(&locks);

	if(cancel_job)
		_cancel_job(job_ptr);

	return rc;
}
Ejemplo n.º 5
0
/*
 * acct_policy_update_pending_job - Make sure the limits imposed on a
 *	job on submission are correct after an update to a qos or
 *	association.  If the association/qos limits prevent
 *	the job from ever running (lowered limits since job submission),
 *	then cancel the job.
 */
extern int acct_policy_update_pending_job(struct job_record *job_ptr)
{
	job_desc_msg_t job_desc;
	uint16_t limit_set_max_cpus = 0;
	uint16_t limit_set_max_nodes = 0;
	uint16_t limit_set_time = 0;
	bool update_accounting = false;
	struct job_details *details_ptr;
	int rc = SLURM_SUCCESS;

	/* check to see if we are enforcing associations and the job
	 * is pending or if we are even enforcing limits. */
	if (!accounting_enforce || !IS_JOB_PENDING(job_ptr)
	    || !(accounting_enforce & ACCOUNTING_ENFORCE_LIMITS))
		return SLURM_SUCCESS;

	details_ptr = job_ptr->details;

	if (!details_ptr) {
		error("acct_policy_update_pending_job: no details");
		return SLURM_ERROR;
	}

	/* set up the job desc to make sure things are the way we
	 * need.
	 */
	slurm_init_job_desc_msg(&job_desc);

	job_desc.min_cpus = details_ptr->min_cpus;
	/* Only set this value if not set from a limit */
	if (job_ptr->limit_set_max_cpus == ADMIN_SET_LIMIT)
		limit_set_max_cpus = job_ptr->limit_set_max_cpus;
	else if ((details_ptr->max_cpus != NO_VAL)
		 && !job_ptr->limit_set_max_cpus)
		job_desc.max_cpus = details_ptr->max_cpus;

	job_desc.min_nodes = details_ptr->min_nodes;
	/* Only set this value if not set from a limit */
	if (job_ptr->limit_set_max_nodes == ADMIN_SET_LIMIT)
		limit_set_max_nodes = job_ptr->limit_set_max_nodes;
	else if ((details_ptr->max_nodes != NO_VAL)
		 && !job_ptr->limit_set_max_nodes)
		job_desc.max_nodes = details_ptr->max_nodes;
	else
		job_desc.max_nodes = 0;

	/* Only set this value if not set from a limit */
	if (job_ptr->limit_set_time == ADMIN_SET_LIMIT)
		limit_set_time = job_ptr->limit_set_time;
	else if ((job_ptr->time_limit != NO_VAL) && !job_ptr->limit_set_time)
		job_desc.time_limit = job_ptr->time_limit;

	if (!acct_policy_validate(&job_desc, job_ptr->part_ptr,
				  job_ptr->assoc_ptr, job_ptr->qos_ptr,
				  &limit_set_max_cpus,
				  &limit_set_max_nodes,
				  &limit_set_time, 0)) {
		info("acct_policy_update_pending_job: exceeded "
		     "association/qos's cpu, node or "
		     "time limit for job %d", job_ptr->job_id);
		_cancel_job(job_ptr);
		return SLURM_ERROR;
	}

	/* If it isn't an admin set limit replace it. */
	if (!limit_set_max_cpus && (job_ptr->limit_set_max_cpus == 1)) {
		details_ptr->max_cpus = NO_VAL;
		job_ptr->limit_set_max_cpus = 0;
		update_accounting = true;
	} else if (limit_set_max_cpus != ADMIN_SET_LIMIT) {
		if (details_ptr->max_cpus != job_desc.max_cpus) {
			details_ptr->max_cpus = job_desc.max_cpus;
			update_accounting = true;
		}
		job_ptr->limit_set_max_cpus = limit_set_max_cpus;
	}

	if (!limit_set_max_nodes && (job_ptr->limit_set_max_nodes == 1)) {
		details_ptr->max_nodes = 0;
		job_ptr->limit_set_max_nodes = 0;
		update_accounting = true;
	} else if (limit_set_max_nodes != ADMIN_SET_LIMIT) {
		if (details_ptr->max_nodes != job_desc.max_nodes) {
			details_ptr->max_nodes = job_desc.max_nodes;
			update_accounting = true;
		}
		job_ptr->limit_set_max_nodes = limit_set_max_nodes;
	}

	if (!limit_set_time && (job_ptr->limit_set_time == 1)) {
		job_ptr->time_limit = NO_VAL;
		job_ptr->limit_set_time = 0;
		update_accounting = true;
	} else if (limit_set_time != ADMIN_SET_LIMIT) {
		if (job_ptr->time_limit != job_desc.time_limit) {
			job_ptr->time_limit = job_desc.time_limit;
			update_accounting = true;
		}
		job_ptr->limit_set_time = limit_set_time;
	}

	if (update_accounting) {
		last_job_update = time(NULL);
		debug("limits changed for job %u: updating accounting",
		      job_ptr->job_id);
		if (details_ptr->begin_time) {
			/* Update job record in accounting to reflect changes */
			jobacct_storage_g_job_start(acct_db_conn, job_ptr);
		}
	}

	return rc;
}