extern int fed_mgr_update_feds(slurmdb_update_object_t *update) { List feds; slurmdb_federation_rec_t *fed = NULL; slurmdb_cluster_rec_t *cluster = NULL; slurmctld_lock_t fed_write_lock = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK }; if (!update->objects) return SLURM_SUCCESS; slurm_mutex_lock(&init_mutex); if (!inited) { slurm_mutex_unlock(&init_mutex); return SLURM_SUCCESS; /* we haven't started the fed mgr and we * can't start it from here, don't worry * all will get set up later. */ } slurm_mutex_unlock(&init_mutex); /* we only want one update happening at a time. */ slurm_mutex_lock(&update_mutex); if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("Got a federation update"); feds = update->objects; /* find the federation that this cluster is in. * if it's changed from last time then update stored information. * grab other clusters in federation * establish connections with each cluster in federation */ /* what if a remote cluster is removed from federation. * have to detect that and close the connection to the remote */ while ((fed = list_pop(feds))) { if (fed->cluster_list && (cluster = list_find_first(fed->cluster_list, slurmdb_find_cluster_in_list, slurmctld_cluster_name))) { _join_federation(fed, cluster, true); break; } slurmdb_destroy_federation_rec(fed); } if (!fed) { if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("Not part of any federation"); lock_slurmctld(fed_write_lock); _leave_federation(); unlock_slurmctld(fed_write_lock); } slurm_mutex_unlock(&update_mutex); return SLURM_SUCCESS; }
/* * Must have FED write lock prior to entering */ static void _leave_federation() { if (!fed_mgr_fed_rec) return; if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("Leaving federation %s", fed_mgr_fed_rec->name); _close_sibling_conns(); _destroy_ping_thread(); slurmdb_destroy_federation_rec(fed_mgr_fed_rec); fed_mgr_fed_rec = NULL; fed_mgr_cluster_rec = NULL; }
/* * Must have FED unlocked prior to entering */ static void _fed_mgr_ptr_init(slurmdb_federation_rec_t *db_fed, slurmdb_cluster_rec_t *cluster) { ListIterator c_itr; slurmdb_cluster_rec_t *tmp_cluster, *db_cluster; slurmctld_lock_t fed_write_lock = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK }; xassert(cluster); if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) info("Joining federation %s", db_fed->name); lock_slurmctld(fed_write_lock); if (fed_mgr_fed_rec) { /* we are already part of a federation, preserve existing * conenctions */ c_itr = list_iterator_create(db_fed->cluster_list); while ((db_cluster = list_next(c_itr))) { if (!xstrcmp(db_cluster->name, slurmctld_cluster_name)) { fed_mgr_cluster_rec = db_cluster; continue; } if (!(tmp_cluster = list_find_first(fed_mgr_fed_rec->cluster_list, slurmdb_find_cluster_in_list, db_cluster->name))) { /* don't worry about destroying the connection * here. It will happen below when we free * fed_mgr_fed_rec (automagically). */ continue; } slurm_mutex_lock(&tmp_cluster->lock); /* transfer over the connections we already have */ db_cluster->fed.send = tmp_cluster->fed.send; tmp_cluster->fed.send = NULL; db_cluster->fed.recv = tmp_cluster->fed.recv; tmp_cluster->fed.recv = NULL; slurm_mutex_unlock(&tmp_cluster->lock); } list_iterator_destroy(c_itr); slurmdb_destroy_federation_rec(fed_mgr_fed_rec); } else fed_mgr_cluster_rec = cluster; fed_mgr_fed_rec = db_fed; unlock_slurmctld(fed_write_lock); }
extern int sacctmgr_modify_federation(int argc, char **argv) { int rc = SLURM_SUCCESS; int i=0; int cond_set = 0, prev_set = 0, rec_set = 0, set = 0; List ret_list = NULL; slurmdb_federation_cond_t *federation_cond = xmalloc(sizeof(slurmdb_federation_cond_t)); slurmdb_federation_rec_t *federation = xmalloc(sizeof(slurmdb_federation_rec_t)); slurmdb_init_federation_cond(federation_cond, 0); slurmdb_init_federation_rec(federation, 0); for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))) { i++; prev_set = _set_cond(&i, argc, argv, federation_cond, NULL); cond_set |= prev_set; } else if (!strncasecmp(argv[i], "Set", MAX(command_len, 3))) { i++; prev_set = _set_rec(&i, argc, argv, NULL, federation); rec_set |= prev_set; } else { prev_set = _set_cond(&i, argc, argv, federation_cond, NULL); cond_set |= prev_set; } } if (exit_code) { rc = SLURM_ERROR; goto end_it; } else if (!rec_set) { exit_code=1; fprintf(stderr, " You didn't give me anything to set\n"); rc = SLURM_ERROR; goto end_it; } else if (!cond_set) { if (!commit_check("You didn't set any conditions " "with 'WHERE'.\n" "Are you sure you want to continue?")) { printf("Aborted\n"); rc = SLURM_SUCCESS; goto end_it; } } else if (verify_federations_exist( federation_cond->federation_list)) { rc = SLURM_ERROR; goto end_it; } if (federation->cluster_list) { bool existing_feds = false; char *mod_fed = NULL; slurmdb_cluster_rec_t *tmp_c = NULL; List cluster_list = federation->cluster_list; if (federation_cond->federation_list && (list_count(federation_cond->federation_list) > 1)) { fprintf(stderr, " Can't assign clusters to " "multiple federations.\n"); rc = SLURM_ERROR; goto end_it; } /* Add all clusters that need to be removed if clearing all * clusters or add clusters that will be removed if setting * clusters to specific set. */ mod_fed = list_peek(federation_cond->federation_list); if ((!list_count(cluster_list) || ((tmp_c = list_peek(cluster_list)) && *tmp_c->name != '-' && *tmp_c->name != '+')) && ((rc = _add_clusters_to_remove(cluster_list, mod_fed)) || (rc = _change_assigns_to_adds(cluster_list)))) { goto end_it; } else if ((rc = verify_fed_clusters(cluster_list, mod_fed, &existing_feds))) { goto end_it; } else if (!list_count(cluster_list)) { printf("Nothing to change\n"); rc = SLURM_ERROR; goto end_it; } else if (existing_feds) { char *warning = "\nAre you sure you want to continue?"; if (!commit_check(warning)) { rc = SLURM_ERROR; goto end_it; } } } printf(" Setting\n"); sacctmgr_print_federation(federation); notice_thread_init(); ret_list = acct_storage_g_modify_federations(db_conn, my_uid, federation_cond, federation); if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); printf(" Modified federation...\n"); while((object = list_next(itr))) { printf(" %s\n", object); } list_iterator_destroy(itr); set = 1; } else if (ret_list) { printf(" Nothing modified\n"); rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); rc = SLURM_ERROR; } FREE_NULL_LIST(ret_list); notice_thread_fini(); if (set) { if (commit_check("Would you like to commit changes?")) acct_storage_g_commit(db_conn, 1); else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } } end_it: slurmdb_destroy_federation_cond(federation_cond); slurmdb_destroy_federation_rec(federation); return rc; }
extern int sacctmgr_add_federation(int argc, char **argv) { int rc = SLURM_SUCCESS; int i = 0, limit_set = 0; slurmdb_federation_rec_t *start_fed = xmalloc(sizeof(slurmdb_federation_rec_t)); List name_list = list_create(slurm_destroy_char); List federation_list; ListIterator itr = NULL; char *name = NULL; slurmdb_init_federation_rec(start_fed, 0); for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!strncasecmp(argv[i], "Where", MAX(command_len, 5)) || !strncasecmp(argv[i], "Set", MAX(command_len, 3))) i++; limit_set += _set_rec(&i, argc, argv, name_list, start_fed); } if (exit_code) { FREE_NULL_LIST(name_list); slurmdb_destroy_federation_rec(start_fed); return SLURM_ERROR; } else if (!list_count(name_list)) { slurmdb_destroy_federation_rec(start_fed); FREE_NULL_LIST(name_list); fprintf(stderr, " Need name of federation to add.\n"); return SLURM_ERROR; } else if (_remove_existing_feds(name_list)) { FREE_NULL_LIST(name_list); slurmdb_destroy_federation_rec(start_fed); return SLURM_ERROR; } if ((list_count(name_list) > 1) && start_fed && start_fed->cluster_list && list_count(start_fed->cluster_list)) { slurmdb_destroy_federation_rec(start_fed); FREE_NULL_LIST(name_list); fprintf(stderr, " Can't assign clusters to multiple " "federations.\n"); return SLURM_ERROR; } if (start_fed && start_fed->cluster_list && list_count(start_fed->cluster_list)) { bool existing_feds = false; if (list_count(name_list) > 1){ slurmdb_destroy_federation_rec(start_fed); FREE_NULL_LIST(name_list); fprintf(stderr, " Can't assign clusters to " "multiple federations.\n"); return SLURM_ERROR; } /* ensure that clusters exist in db */ /* and if the clusters are already assigned to another fed. */ if (verify_fed_clusters(start_fed->cluster_list, NULL, &existing_feds)) { FREE_NULL_LIST(name_list); slurmdb_destroy_federation_rec(start_fed); return SLURM_ERROR; } else if (existing_feds) { char *warning = "\nAre you sure you want to continue?"; if (!commit_check(warning)) { FREE_NULL_LIST(name_list); slurmdb_destroy_federation_rec(start_fed); return SLURM_ERROR; } } } printf(" Adding Federation(s)\n"); federation_list = list_create(slurmdb_destroy_federation_rec); itr = list_iterator_create(name_list); while((name = list_next(itr))) { slurmdb_federation_rec_t *fed = NULL; if (!name[0]) { fprintf(stderr, " Skipping blank fed name.\n"); continue; } fed = xmalloc(sizeof(slurmdb_federation_rec_t)); slurmdb_init_federation_rec(fed, 0); list_append(federation_list, fed); slurmdb_copy_federation_rec(fed, start_fed); fed->name = xstrdup(name); printf(" %s\n", fed->name); } list_iterator_destroy(itr); FREE_NULL_LIST(name_list); if (limit_set) { printf(" Settings\n"); sacctmgr_print_federation(start_fed); } slurmdb_destroy_federation_rec(start_fed); if (!list_count(federation_list)) { printf(" Nothing new added.\n"); rc = SLURM_ERROR; goto end_it; } notice_thread_init(); rc = acct_storage_g_add_federations(db_conn, my_uid, federation_list); notice_thread_fini(); if (rc == SLURM_SUCCESS) { if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } } else { exit_code = 1; fprintf(stderr, " Problem adding federation(s): %s\n", slurm_strerror(rc)); rc = SLURM_ERROR; } end_it: FREE_NULL_LIST(federation_list); return rc; }
/* * slurm_destroy_federation_rec - Release memory allocated by * slurm_load_federation() */ extern void slurm_destroy_federation_rec(void *fed) { slurmdb_destroy_federation_rec(fed); }
extern slurmdb_federation_rec_t *fed_mgr_state_load(char *state_save_location) { Buf buffer = NULL; char *data = NULL, *state_file; time_t buf_time; uint16_t ver = 0; uint32_t data_size = 0; int state_fd; int data_allocated, data_read = 0, error_code = SLURM_SUCCESS; slurmdb_federation_rec_t *ret_fed = NULL; state_file = xstrdup_printf("%s/%s", state_save_location, FED_MGR_STATE_FILE); state_fd = open(state_file, O_RDONLY); if (state_fd < 0) { error("No fed_mgr state file (%s) to recover", state_file); xfree(state_file); return NULL; } else { data_allocated = BUF_SIZE; data = xmalloc(data_allocated); while (1) { data_read = read(state_fd, &data[data_size], BUF_SIZE); if (data_read < 0) { if (errno == EINTR) continue; else { error("Read error on %s: %m", state_file); break; } } else if (data_read == 0) /* eof */ break; data_size += data_read; data_allocated += data_read; xrealloc(data, data_allocated); } close(state_fd); } xfree(state_file); buffer = create_buf(data, data_size); safe_unpack16(&ver, buffer); debug3("Version in fed_mgr_state header is %u", ver); if (ver > SLURM_PROTOCOL_VERSION || ver < SLURM_MIN_PROTOCOL_VERSION) { error("***********************************************"); error("Can not recover fed_mgr state, incompatible version, " "got %u need > %u <= %u", ver, SLURM_MIN_PROTOCOL_VERSION, SLURM_PROTOCOL_VERSION); error("***********************************************"); free_buf(buffer); return NULL; } safe_unpack_time(&buf_time, buffer); error_code = slurmdb_unpack_federation_rec((void **)&ret_fed, ver, buffer); if (error_code != SLURM_SUCCESS) goto unpack_error; else if (!ret_fed || !ret_fed->name || !list_count(ret_fed->cluster_list)) { slurmdb_destroy_federation_rec(ret_fed); ret_fed = NULL; error("No feds retrieved"); } else { /* We want to free the connections here since they don't exist * anymore, but they were packed when state was saved. */ slurmdb_cluster_rec_t *cluster; ListIterator itr = list_iterator_create( ret_fed->cluster_list); while ((cluster = list_next(itr))) { slurm_persist_conn_destroy(cluster->fed.recv); cluster->fed.recv = NULL; slurm_persist_conn_destroy(cluster->fed.send); cluster->fed.send = NULL; } list_iterator_destroy(itr); } free_buf(buffer); return ret_fed; unpack_error: free_buf(buffer); return NULL; }