示例#1
0
extern int fed_mgr_update_feds(slurmdb_update_object_t *update)
{
	List feds;
	slurmdb_federation_rec_t *fed = NULL;
	slurmdb_cluster_rec_t *cluster = NULL;
	slurmctld_lock_t fed_write_lock = {
		NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK };

	if (!update->objects)
		return SLURM_SUCCESS;

	slurm_mutex_lock(&init_mutex);
	if (!inited) {
		slurm_mutex_unlock(&init_mutex);
		return SLURM_SUCCESS; /* we haven't started the fed mgr and we
				       * can't start it from here, don't worry
				       * all will get set up later. */
	}
	slurm_mutex_unlock(&init_mutex);
	/* we only want one update happening at a time. */
	slurm_mutex_lock(&update_mutex);
	if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR)
		info("Got a federation update");

	feds = update->objects;

	/* find the federation that this cluster is in.
	 * if it's changed from last time then update stored information.
	 * grab other clusters in federation
	 * establish connections with each cluster in federation */

	/* what if a remote cluster is removed from federation.
	 * have to detect that and close the connection to the remote */
	while ((fed = list_pop(feds))) {
		if (fed->cluster_list &&
		    (cluster = list_find_first(fed->cluster_list,
					       slurmdb_find_cluster_in_list,
					       slurmctld_cluster_name))) {
			_join_federation(fed, cluster, true);
			break;
		}

		slurmdb_destroy_federation_rec(fed);
	}

	if (!fed) {
		if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR)
			info("Not part of any federation");
		lock_slurmctld(fed_write_lock);
		_leave_federation();
		unlock_slurmctld(fed_write_lock);
	}
	slurm_mutex_unlock(&update_mutex);
	return SLURM_SUCCESS;
}
示例#2
0
/*
 * Must have FED write lock prior to entering
 */
static void _leave_federation()
{
	if (!fed_mgr_fed_rec)
		return;

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR)
		info("Leaving federation %s", fed_mgr_fed_rec->name);

	_close_sibling_conns();
	_destroy_ping_thread();
	slurmdb_destroy_federation_rec(fed_mgr_fed_rec);
	fed_mgr_fed_rec = NULL;
	fed_mgr_cluster_rec = NULL;
}
示例#3
0
/*
 * Must have FED unlocked prior to entering
 */
static void _fed_mgr_ptr_init(slurmdb_federation_rec_t *db_fed,
			      slurmdb_cluster_rec_t *cluster)
{
	ListIterator c_itr;
	slurmdb_cluster_rec_t *tmp_cluster, *db_cluster;
	slurmctld_lock_t fed_write_lock = {
		NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK };

	xassert(cluster);

	if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR)
		info("Joining federation %s", db_fed->name);

	lock_slurmctld(fed_write_lock);
	if (fed_mgr_fed_rec) {
		/* we are already part of a federation, preserve existing
		 * conenctions */
		c_itr = list_iterator_create(db_fed->cluster_list);
		while ((db_cluster = list_next(c_itr))) {
			if (!xstrcmp(db_cluster->name,
				     slurmctld_cluster_name)) {
				fed_mgr_cluster_rec = db_cluster;
				continue;
			}
			if (!(tmp_cluster =
			      list_find_first(fed_mgr_fed_rec->cluster_list,
					      slurmdb_find_cluster_in_list,
					      db_cluster->name))) {
				/* don't worry about destroying the connection
				 * here.  It will happen below when we free
				 * fed_mgr_fed_rec (automagically).
				 */
				continue;
			}
			slurm_mutex_lock(&tmp_cluster->lock);
			/* transfer over the connections we already have */
			db_cluster->fed.send = tmp_cluster->fed.send;
			tmp_cluster->fed.send = NULL;
			db_cluster->fed.recv = tmp_cluster->fed.recv;
			tmp_cluster->fed.recv = NULL;
			slurm_mutex_unlock(&tmp_cluster->lock);
		}
		list_iterator_destroy(c_itr);
		slurmdb_destroy_federation_rec(fed_mgr_fed_rec);
	} else
		fed_mgr_cluster_rec = cluster;

	fed_mgr_fed_rec = db_fed;
	unlock_slurmctld(fed_write_lock);
}
示例#4
0
extern int sacctmgr_modify_federation(int argc, char **argv)
{
	int rc = SLURM_SUCCESS;
	int i=0;
	int cond_set = 0, prev_set = 0, rec_set = 0, set = 0;
	List ret_list = NULL;
	slurmdb_federation_cond_t *federation_cond =
		xmalloc(sizeof(slurmdb_federation_cond_t));
	slurmdb_federation_rec_t *federation =
		xmalloc(sizeof(slurmdb_federation_rec_t));

	slurmdb_init_federation_cond(federation_cond, 0);
	slurmdb_init_federation_rec(federation, 0);

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))) {
			i++;
			prev_set = _set_cond(&i, argc, argv,
					     federation_cond, NULL);
			cond_set |= prev_set;
		} else if (!strncasecmp(argv[i], "Set", MAX(command_len, 3))) {
			i++;
			prev_set = _set_rec(&i, argc, argv, NULL, federation);
			rec_set |= prev_set;
		} else {
			prev_set = _set_cond(&i, argc, argv,
					     federation_cond, NULL);
			cond_set |= prev_set;
		}
	}

	if (exit_code) {
		rc = SLURM_ERROR;
		goto end_it;
	} else if (!rec_set) {
		exit_code=1;
		fprintf(stderr, " You didn't give me anything to set\n");
		rc = SLURM_ERROR;
		goto end_it;
	} else if (!cond_set) {
		if (!commit_check("You didn't set any conditions "
				  "with 'WHERE'.\n"
				  "Are you sure you want to continue?")) {
			printf("Aborted\n");
			rc = SLURM_SUCCESS;
			goto end_it;
		}
	} else if (verify_federations_exist(
					federation_cond->federation_list)) {
		rc = SLURM_ERROR;
		goto end_it;
	}

	if (federation->cluster_list) {
		bool existing_feds = false;
		char *mod_fed = NULL;
		slurmdb_cluster_rec_t *tmp_c = NULL;
		List cluster_list = federation->cluster_list;

		if (federation_cond->federation_list &&
		    (list_count(federation_cond->federation_list) > 1)) {
			fprintf(stderr, " Can't assign clusters to "
					"multiple federations.\n");
			rc = SLURM_ERROR;
			goto end_it;
		}

		/* Add all clusters that need to be removed if clearing all
		 * clusters or add clusters that will be removed if setting
		 * clusters to specific set. */
		mod_fed = list_peek(federation_cond->federation_list);
		if ((!list_count(cluster_list) ||
		     ((tmp_c = list_peek(cluster_list)) &&
		      *tmp_c->name != '-' && *tmp_c->name != '+')) &&
		    ((rc = _add_clusters_to_remove(cluster_list, mod_fed)) ||
		     (rc = _change_assigns_to_adds(cluster_list)))) {
			goto end_it;
		} else if ((rc = verify_fed_clusters(cluster_list, mod_fed,
						     &existing_feds))) {
			goto end_it;
		} else if (!list_count(cluster_list)) {
			printf("Nothing to change\n");
			rc = SLURM_ERROR;
			goto end_it;
		} else if (existing_feds) {
			char *warning = "\nAre you sure you want to continue?";
			if (!commit_check(warning)) {
				rc = SLURM_ERROR;
				goto end_it;
			}
		}
	}

	printf(" Setting\n");
	sacctmgr_print_federation(federation);

	notice_thread_init();
	ret_list = acct_storage_g_modify_federations(db_conn, my_uid,
						     federation_cond,
						     federation);

	if (ret_list && list_count(ret_list)) {
		char *object = NULL;
		ListIterator itr = list_iterator_create(ret_list);
		printf(" Modified federation...\n");
		while((object = list_next(itr))) {
			printf("  %s\n", object);
		}
		list_iterator_destroy(itr);
		set = 1;
	} else if (ret_list) {
		printf(" Nothing modified\n");
		rc = SLURM_ERROR;
	} else {
		exit_code=1;
		fprintf(stderr, " Error with request: %s\n",
			slurm_strerror(errno));
		rc = SLURM_ERROR;
	}

	FREE_NULL_LIST(ret_list);

	notice_thread_fini();

	if (set) {
		if (commit_check("Would you like to commit changes?"))
			acct_storage_g_commit(db_conn, 1);
		else {
			printf(" Changes Discarded\n");
			acct_storage_g_commit(db_conn, 0);
		}
	}
end_it:
	slurmdb_destroy_federation_cond(federation_cond);
	slurmdb_destroy_federation_rec(federation);

	return rc;
}
示例#5
0
extern int sacctmgr_add_federation(int argc, char **argv)
{
	int rc = SLURM_SUCCESS;
	int i = 0, limit_set = 0;
	slurmdb_federation_rec_t *start_fed =
		xmalloc(sizeof(slurmdb_federation_rec_t));
	List name_list = list_create(slurm_destroy_char);
	List federation_list;
	ListIterator itr = NULL;
	char *name = NULL;

	slurmdb_init_federation_rec(start_fed, 0);

	for (i=0; i<argc; i++) {
		int command_len = strlen(argv[i]);
		if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))
		    || !strncasecmp(argv[i], "Set", MAX(command_len, 3)))
			i++;
		limit_set += _set_rec(&i, argc, argv, name_list, start_fed);
	}
	if (exit_code) {
		FREE_NULL_LIST(name_list);
		slurmdb_destroy_federation_rec(start_fed);
		return SLURM_ERROR;
	} else if (!list_count(name_list)) {
		slurmdb_destroy_federation_rec(start_fed);
		FREE_NULL_LIST(name_list);
		fprintf(stderr, " Need name of federation to add.\n");
		return SLURM_ERROR;
	} else if (_remove_existing_feds(name_list)) {
		FREE_NULL_LIST(name_list);
		slurmdb_destroy_federation_rec(start_fed);
		return SLURM_ERROR;
	}

	if ((list_count(name_list) > 1) &&
	    start_fed && start_fed->cluster_list &&
	    list_count(start_fed->cluster_list)) {
		slurmdb_destroy_federation_rec(start_fed);
		FREE_NULL_LIST(name_list);
		fprintf(stderr, " Can't assign clusters to multiple "
				"federations.\n");
		return SLURM_ERROR;
	}
	if (start_fed && start_fed->cluster_list &&
	    list_count(start_fed->cluster_list)) {
		bool existing_feds = false;

		if (list_count(name_list) > 1){
			slurmdb_destroy_federation_rec(start_fed);
			FREE_NULL_LIST(name_list);
			fprintf(stderr, " Can't assign clusters to "
				"multiple federations.\n");
			return SLURM_ERROR;
		}

		/* ensure that clusters exist in db */
		/* and if the clusters are already assigned to another fed. */
		if (verify_fed_clusters(start_fed->cluster_list, NULL,
					&existing_feds)) {
			FREE_NULL_LIST(name_list);
			slurmdb_destroy_federation_rec(start_fed);
			return SLURM_ERROR;
		} else if (existing_feds) {
			char *warning = "\nAre you sure you want to continue?";
			if (!commit_check(warning)) {
				FREE_NULL_LIST(name_list);
				slurmdb_destroy_federation_rec(start_fed);
				return SLURM_ERROR;
			}
		}
	}

	printf(" Adding Federation(s)\n");
	federation_list = list_create(slurmdb_destroy_federation_rec);
	itr = list_iterator_create(name_list);
	while((name = list_next(itr))) {
		slurmdb_federation_rec_t *fed = NULL;
		if (!name[0]) {
			fprintf(stderr, " Skipping blank fed name.\n");
			continue;
		}
		fed = xmalloc(sizeof(slurmdb_federation_rec_t));
		slurmdb_init_federation_rec(fed, 0);
		list_append(federation_list, fed);
		slurmdb_copy_federation_rec(fed, start_fed);
		fed->name = xstrdup(name);
		printf("  %s\n", fed->name);
	}
	list_iterator_destroy(itr);
	FREE_NULL_LIST(name_list);

	if (limit_set) {
		printf(" Settings\n");
		sacctmgr_print_federation(start_fed);
	}
	slurmdb_destroy_federation_rec(start_fed);

	if (!list_count(federation_list)) {
		printf(" Nothing new added.\n");
		rc = SLURM_ERROR;
		goto end_it;
	}

	notice_thread_init();
	rc = acct_storage_g_add_federations(db_conn, my_uid,
					    federation_list);
	notice_thread_fini();

	if (rc == SLURM_SUCCESS) {
		if (commit_check("Would you like to commit changes?")) {
			acct_storage_g_commit(db_conn, 1);
		} else {
			printf(" Changes Discarded\n");
			acct_storage_g_commit(db_conn, 0);
		}
	} else {
		exit_code = 1;
		fprintf(stderr, " Problem adding federation(s): %s\n",
			slurm_strerror(rc));
		rc = SLURM_ERROR;
	}

end_it:
	FREE_NULL_LIST(federation_list);

	return rc;
}
示例#6
0
/*
 * slurm_destroy_federation_rec - Release memory allocated by
 *				  slurm_load_federation()
 */
extern void slurm_destroy_federation_rec(void *fed)
{
	slurmdb_destroy_federation_rec(fed);
}
示例#7
0
extern slurmdb_federation_rec_t *fed_mgr_state_load(char *state_save_location)
{
	Buf buffer = NULL;
	char *data = NULL, *state_file;
	time_t buf_time;
	uint16_t ver = 0;
	uint32_t data_size = 0;
	int state_fd;
	int data_allocated, data_read = 0, error_code = SLURM_SUCCESS;
	slurmdb_federation_rec_t *ret_fed = NULL;

	state_file = xstrdup_printf("%s/%s", state_save_location,
				    FED_MGR_STATE_FILE);
	state_fd = open(state_file, O_RDONLY);
	if (state_fd < 0) {
		error("No fed_mgr state file (%s) to recover", state_file);
		xfree(state_file);
		return NULL;
	} else {
		data_allocated = BUF_SIZE;
		data = xmalloc(data_allocated);
		while (1) {
			data_read = read(state_fd, &data[data_size],
					 BUF_SIZE);
			if (data_read < 0) {
				if (errno == EINTR)
					continue;
				else {
					error("Read error on %s: %m",
					      state_file);
					break;
				}
			} else if (data_read == 0)	/* eof */
				break;
			data_size      += data_read;
			data_allocated += data_read;
			xrealloc(data, data_allocated);
		}
		close(state_fd);
	}
	xfree(state_file);

	buffer = create_buf(data, data_size);

	safe_unpack16(&ver, buffer);

	debug3("Version in fed_mgr_state header is %u", ver);
	if (ver > SLURM_PROTOCOL_VERSION || ver < SLURM_MIN_PROTOCOL_VERSION) {
		error("***********************************************");
		error("Can not recover fed_mgr state, incompatible version, "
		      "got %u need > %u <= %u", ver,
		      SLURM_MIN_PROTOCOL_VERSION, SLURM_PROTOCOL_VERSION);
		error("***********************************************");
		free_buf(buffer);
		return NULL;
	}

	safe_unpack_time(&buf_time, buffer);

	error_code = slurmdb_unpack_federation_rec((void **)&ret_fed, ver,
						   buffer);
	if (error_code != SLURM_SUCCESS)
		goto unpack_error;
	else if (!ret_fed || !ret_fed->name ||
		 !list_count(ret_fed->cluster_list)) {
		slurmdb_destroy_federation_rec(ret_fed);
		ret_fed = NULL;
		error("No feds retrieved");
	} else {
		/* We want to free the connections here since they don't exist
		 * anymore, but they were packed when state was saved. */
		slurmdb_cluster_rec_t *cluster;
		ListIterator itr = list_iterator_create(
			ret_fed->cluster_list);
		while ((cluster = list_next(itr))) {
			slurm_persist_conn_destroy(cluster->fed.recv);
			cluster->fed.recv = NULL;
			slurm_persist_conn_destroy(cluster->fed.send);
			cluster->fed.send = NULL;
		}
		list_iterator_destroy(itr);
	}

	free_buf(buffer);

	return ret_fed;

unpack_error:
	free_buf(buffer);

	return NULL;
}