extern void * crypto_read_public_key(const char *path) { munge_ctx_t ctx; char *socket; int auth_ttl, rc; /* * Get slurm user id once. We use it later to verify credentials. */ slurm_user = slurm_get_slurm_user_id(); ctx = munge_ctx_create(); socket = _auth_opts_to_socket(); if (socket) { rc = munge_ctx_set(ctx, MUNGE_OPT_SOCKET, socket); xfree(socket); if (rc != EMUNGE_SUCCESS) { error("munge_ctx_set failure"); munge_ctx_destroy(ctx); return NULL; } } auth_ttl = slurm_get_auth_ttl(); if (auth_ttl) (void) munge_ctx_set(ctx, MUNGE_OPT_TTL, auth_ttl); return (void *) ctx; }
/* process RPC from slurmctld * IN msg: message received * OUT resp: resource allocation response message * RET 1 if resp is filled in, 0 otherwise */ static int _handle_msg(slurm_msg_t *msg, resource_allocation_response_msg_t **resp) { uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, NULL); uid_t uid = getuid(); uid_t slurm_uid = (uid_t) slurm_get_slurm_user_id(); int rc = 0; if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { error ("Security violation, slurm message from uid %u", (unsigned int) req_uid); return 0; } switch (msg->msg_type) { case RESPONSE_RESOURCE_ALLOCATION: debug2("resource allocation response received"); slurm_send_rc_msg(msg, SLURM_SUCCESS); *resp = msg->data; rc = 1; break; case SRUN_JOB_COMPLETE: info("Job has been cancelled"); break; default: error("received spurious message type: %d", msg->msg_type); } return rc; }
/* process RPC from slurmctld * IN msg: message received * OUT resp: resource allocation response message or List of them * RET 1 if resp is filled in, 0 otherwise */ static int _handle_msg(slurm_msg_t *msg, uint16_t msg_type, void **resp) { char *auth_info = slurm_get_auth_info(); uid_t req_uid; uid_t uid = getuid(); uid_t slurm_uid = (uid_t) slurm_get_slurm_user_id(); int rc = 0; req_uid = g_slurm_auth_get_uid(msg->auth_cred, auth_info); xfree(auth_info); if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { error ("Security violation, slurm message from uid %u", (unsigned int) req_uid); return 0; } if (msg->msg_type == msg_type) { debug2("resource allocation response received"); slurm_send_rc_msg(msg, SLURM_SUCCESS); *resp = msg->data; /* transfer payload to response */ msg->data = NULL; rc = 1; } else if (msg->msg_type == SRUN_JOB_COMPLETE) { info("Job has been cancelled"); } else { error("%s: received spurious message type: %u", __func__, msg->msg_type); } return rc; }
/* * init() is called when the plugin is loaded, before any other functions * are called. Put global initialization here. */ extern int init ( void ) { static int first = 1; char *log_file = NULL; int rc = SLURM_SUCCESS; mode_t prot = 0600; struct stat statbuf; if (slurmdbd_conf) { fatal("The filetxt plugin should not " "be run from the slurmdbd. " "Please use a database plugin"); } /* This check for the slurm user id is a quick and dirty patch * to see if the controller is calling this, since we open the * file in append mode stats could fail on it if the file * isn't world writable. */ if (first && (getuid() == slurm_get_slurm_user_id())) { debug2("slurmdb_init() called"); log_file = slurm_get_accounting_storage_loc(); if (!log_file) log_file = xstrdup(DEFAULT_STORAGE_LOC); slurm_mutex_lock( &logfile_lock ); if (LOGFILE) fclose(LOGFILE); if (*log_file != '/') fatal("AccountingStorageLoc must specify an " "absolute pathname"); if (stat(log_file, &statbuf)==0)/* preserve current file mode */ prot = statbuf.st_mode; LOGFILE = fopen(log_file, "a"); if (LOGFILE == NULL) { error("open %s: %m", log_file); storage_init = 0; xfree(log_file); slurm_mutex_unlock( &logfile_lock ); return SLURM_ERROR; } else chmod(log_file, prot); xfree(log_file); if (setvbuf(LOGFILE, NULL, _IOLBF, 0)) error("setvbuf() failed"); LOGFILE_FD = fileno(LOGFILE); slurm_mutex_unlock( &logfile_lock ); storage_init = 1; /* since this can be loaded from many different places only tell us once. */ verbose("%s loaded", plugin_name); first = 0; } else { debug4("%s loaded", plugin_name); } return rc; }
static void _handle_msg(slurm_msg_t *msg) { static uint32_t slurm_uid = NO_VAL; uid_t req_uid = g_slurm_auth_get_uid(msg->auth_cred, slurm_get_auth_info()); uid_t uid = getuid(); job_step_kill_msg_t *ss; srun_user_msg_t *um; if (slurm_uid == NO_VAL) slurm_uid = slurm_get_slurm_user_id(); if ((req_uid != slurm_uid) && (req_uid != 0) && (req_uid != uid)) { error ("Security violation, slurm message from uid %u", (unsigned int) req_uid); return; } switch (msg->msg_type) { case SRUN_PING: debug3("slurmctld ping received"); slurm_send_rc_msg(msg, SLURM_SUCCESS); slurm_free_srun_ping_msg(msg->data); break; case SRUN_JOB_COMPLETE: debug("received job step complete message"); runjob_signal(SIGKILL); slurm_free_srun_job_complete_msg(msg->data); break; case SRUN_USER_MSG: um = msg->data; info("%s", um->msg); slurm_free_srun_user_msg(msg->data); break; case SRUN_TIMEOUT: debug("received job step timeout message"); _handle_timeout(msg->data); slurm_free_srun_timeout_msg(msg->data); break; case SRUN_STEP_SIGNAL: ss = msg->data; debug("received step signal %u RPC", ss->signal); if (ss->signal) runjob_signal(ss->signal); slurm_free_job_step_kill_msg(msg->data); break; default: debug("received spurious message type: %u", msg->msg_type); break; } return; }
static void _load_slurm_config(void) { acct_storage_backup_host = slurm_get_accounting_storage_backup_host(); acct_storage_host = slurm_get_accounting_storage_host(); acct_storage_loc = slurm_get_accounting_storage_loc(); acct_storage_pass = slurm_get_accounting_storage_pass(); acct_storage_port = slurm_get_accounting_storage_port(); acct_storage_type = slurm_get_accounting_storage_type(); acct_storage_user = slurm_get_accounting_storage_user(); auth_type = slurm_get_auth_type(); msg_timeout = slurm_get_msg_timeout(); plugin_dir = slurm_get_plugin_dir(); private_data = slurm_get_private_data(); slurm_user_id = slurm_get_slurm_user_id(); track_wckey = slurm_get_track_wckey(); }
extern int clusteracct_storage_g_node_up(void *db_conn, struct node_record *node_ptr, time_t event_time) { if (slurm_acct_storage_init(NULL) < 0) return SLURM_ERROR; /* on some systems we need to make sure we don't say something is completely up if there are cpus in an error state */ if(node_ptr->select_nodeinfo) { uint16_t err_cpus = 0; select_g_select_nodeinfo_get(node_ptr->select_nodeinfo, SELECT_NODEDATA_SUBCNT, NODE_STATE_ERROR, &err_cpus); if(err_cpus) { char *reason = "Setting partial node down."; struct node_record send_node; struct config_record config_rec; uint16_t cpu_cnt = 0; select_g_alter_node_cnt(SELECT_GET_NODE_CPU_CNT, &cpu_cnt); err_cpus *= cpu_cnt; memset(&send_node, 0, sizeof(struct node_record)); memset(&config_rec, 0, sizeof(struct config_record)); send_node.name = node_ptr->name; send_node.config_ptr = &config_rec; send_node.cpus = err_cpus; config_rec.cpus = err_cpus; send_node.node_state = NODE_STATE_ERROR; return (*(g_acct_storage_context->ops.node_down)) (db_conn, &send_node, event_time, reason, slurm_get_slurm_user_id()); } } return (*(g_acct_storage_context->ops.node_up)) (db_conn, node_ptr, event_time); }
/* * set_front_end_down - make the specified front end node's state DOWN and * kill jobs as needed * IN front_end_pt - pointer to the front end node * IN reason - why the node is DOWN */ extern void set_front_end_down (front_end_record_t *front_end_ptr, char *reason) { #ifdef HAVE_FRONT_END time_t now = time(NULL); uint16_t state_flags = front_end_ptr->node_state & NODE_STATE_FLAGS; state_flags &= (~NODE_STATE_COMPLETING); front_end_ptr->node_state = NODE_STATE_DOWN | state_flags; trigger_front_end_down(front_end_ptr); (void) kill_job_by_front_end_name(front_end_ptr->name); if ((front_end_ptr->reason == NULL) || (strncmp(front_end_ptr->reason, "Not responding", 14) == 0)) { xfree(front_end_ptr->reason); front_end_ptr->reason = xstrdup(reason); front_end_ptr->reason_time = now; front_end_ptr->reason_uid = slurm_get_slurm_user_id(); } last_front_end_update = now; #endif }
static void _layout_conf_dbd(GtkTreeStore *treestore) { ListIterator itr = NULL; GtkTreeIter iter; config_key_pair_t *key_pair; int update = 0; time_t now = time(NULL); char tmp_str[128], *user_name = NULL; List dbd_config_list = NULL; /* first load accounting parms from slurm.conf */ char *acct_storage_backup_host = slurm_get_accounting_storage_backup_host(); char *acct_storage_host = slurm_get_accounting_storage_host(); char *acct_storage_loc = slurm_get_accounting_storage_loc(); char *acct_storage_pass = slurm_get_accounting_storage_pass(); uint32_t acct_storage_port = slurm_get_accounting_storage_port(); char *acct_storage_type = slurm_get_accounting_storage_type(); char *acct_storage_user = slurm_get_accounting_storage_user(); char *auth_type = slurm_get_auth_type(); uint16_t msg_timeout = slurm_get_msg_timeout(); char *plugin_dir = slurm_get_plugin_dir(); uint16_t private_data = slurm_get_private_data(); uint32_t slurm_user_id = slurm_get_slurm_user_id(); uint16_t track_wckey = slurm_get_track_wckey(); slurm_make_time_str(&now, tmp_str, sizeof(tmp_str)); add_display_treestore_line_with_font( update, treestore, &iter, "SLURM Configuration data as of", tmp_str, "bold"); add_display_treestore_line(update, treestore, &iter, "AccountingStorageBackupHost", acct_storage_backup_host); add_display_treestore_line(update, treestore, &iter, "AccountingStorageHost", acct_storage_host); add_display_treestore_line(update, treestore, &iter, "AccountingStorageLoc", acct_storage_loc); add_display_treestore_line(update, treestore, &iter, "AccountingStoragePass", acct_storage_pass); sprintf(tmp_str, "%u", acct_storage_port); add_display_treestore_line(update, treestore, &iter, "AccountingStoragePort", tmp_str); add_display_treestore_line(update, treestore, &iter, "AccountingStorageType", acct_storage_type); add_display_treestore_line(update, treestore, &iter, "AccountingStorageUser", acct_storage_user); add_display_treestore_line(update, treestore, &iter, "AuthType", auth_type); sprintf(tmp_str, "%u sec", msg_timeout); add_display_treestore_line(update, treestore, &iter, "MessageTimeout", tmp_str); add_display_treestore_line(update, treestore, &iter, "PluginDir", plugin_dir); private_data_string(private_data, tmp_str, sizeof(tmp_str)); add_display_treestore_line(update, treestore, &iter, "PrivateData", tmp_str); user_name = uid_to_string(slurm_user_id); sprintf(tmp_str, "%s(%u)", user_name, slurm_user_id); xfree(user_name); add_display_treestore_line(update, treestore, &iter, "SlurmUserId", tmp_str); add_display_treestore_line(update, treestore, &iter, "SLURM_CONF", default_slurm_config_file); add_display_treestore_line(update, treestore, &iter, "SLURM_VERSION", SLURM_VERSION_STRING); sprintf(tmp_str, "%u", track_wckey); add_display_treestore_line(update, treestore, &iter, "TrackWCKey", tmp_str); xfree(acct_storage_backup_host); xfree(acct_storage_host); xfree(acct_storage_loc); xfree(acct_storage_pass); xfree(acct_storage_type); xfree(acct_storage_user); xfree(auth_type); xfree(plugin_dir); /* now load accounting parms from slurmdbd.conf */ /* second load slurmdbd.conf parms */ if (!(dbd_config_list = slurmdb_config_get(NULL))) return; add_display_treestore_line_with_font( update, treestore, &iter, "\nSlurmDBD Configuration:", NULL, "bold"); itr = list_iterator_create(dbd_config_list); while ((key_pair = list_next(itr))) { add_display_treestore_line(update, treestore, &iter, key_pair->name, key_pair->value); } list_iterator_destroy(itr); }
extern int sacctmgr_dump_cluster (int argc, char **argv) { slurmdb_user_cond_t user_cond; slurmdb_user_rec_t *user = NULL; slurmdb_hierarchical_rec_t *slurmdb_hierarchical_rec = NULL; slurmdb_assoc_rec_t *assoc = NULL; slurmdb_assoc_cond_t assoc_cond; List assoc_list = NULL; List acct_list = NULL; List user_list = NULL; List slurmdb_hierarchical_rec_list = NULL; char *cluster_name = NULL; char *file_name = NULL; char *user_name = NULL; char *line = NULL; int i, command_len = 0; FILE *fd = NULL; char *class_str = NULL; for (i = 0; i < argc; i++) { int end = parse_option_end(argv[i]); if (!end) command_len = strlen(argv[i]); else { command_len = end - 1; if (argv[i][end] == '=') { end++; } } if (!end || !strncasecmp(argv[i], "Cluster", MAX(command_len, 1))) { if (cluster_name) { exit_code = 1; fprintf(stderr, " Can only do one cluster at a time. " "Already doing %s\n", cluster_name); continue; } cluster_name = xstrdup(argv[i]+end); } else if (!strncasecmp(argv[i], "File", MAX(command_len, 1))) { if (file_name) { exit_code = 1; fprintf(stderr, " File name already set to %s\n", file_name); continue; } file_name = xstrdup(argv[i]+end); } else { exit_code = 1; fprintf(stderr, " Unknown option: %s\n", argv[i]); } } if (!cluster_name) { exit_code = 1; fprintf(stderr, " We need a cluster to dump.\n"); xfree(file_name); return SLURM_ERROR; } else { List temp_list = NULL; slurmdb_cluster_cond_t cluster_cond; slurmdb_cluster_rec_t *cluster_rec = NULL; slurmdb_init_cluster_cond(&cluster_cond, 0); cluster_cond.cluster_list = list_create(NULL); list_push(cluster_cond.cluster_list, cluster_name); temp_list = acct_storage_g_get_clusters(db_conn, my_uid, &cluster_cond); FREE_NULL_LIST(cluster_cond.cluster_list); if (!temp_list) { exit_code = 1; fprintf(stderr, " Problem getting clusters from database. " "Contact your admin.\n"); xfree(cluster_name); xfree(file_name); return SLURM_ERROR; } cluster_rec = list_peek(temp_list); if (!cluster_rec) { exit_code = 1; fprintf(stderr, " Cluster %s doesn't exist.\n", cluster_name); xfree(cluster_name); xfree(file_name); FREE_NULL_LIST(temp_list); return SLURM_ERROR; } class_str = get_classification_str(cluster_rec->classification); FREE_NULL_LIST(temp_list); } if (!file_name) { file_name = xstrdup_printf("./%s.cfg", cluster_name); printf(" No filename given, using %s.\n", file_name); } memset(&user_cond, 0, sizeof(slurmdb_user_cond_t)); user_cond.with_coords = 1; user_cond.with_wckeys = 1; user_cond.with_assocs = 1; memset(&assoc_cond, 0, sizeof(slurmdb_assoc_cond_t)); assoc_cond.without_parent_limits = 1; assoc_cond.with_raw_qos = 1; assoc_cond.cluster_list = list_create(NULL); list_append(assoc_cond.cluster_list, cluster_name); /* this is needed for getting the correct wckeys */ user_cond.assoc_cond = &assoc_cond; user_list = acct_storage_g_get_users(db_conn, my_uid, &user_cond); /* If not running with the DBD assoc_cond.user_list can be set, * which will mess other things up. */ if (assoc_cond.user_list) { FREE_NULL_LIST(assoc_cond.user_list); assoc_cond.user_list = NULL; } /* make sure this person running is an admin */ user_name = uid_to_string_cached(my_uid); if (!(user = sacctmgr_find_user_from_list(user_list, user_name))) { exit_code = 1; fprintf(stderr, " Your uid (%u) is not in the " "accounting system, can't dump cluster.\n", my_uid); FREE_NULL_LIST(assoc_cond.cluster_list); xfree(cluster_name); xfree(file_name); FREE_NULL_LIST(user_list); return SLURM_ERROR; } else { if (my_uid != slurm_get_slurm_user_id() && my_uid != 0 && user->admin_level < SLURMDB_ADMIN_SUPER_USER) { exit_code = 1; fprintf(stderr, " Your user does not have sufficient " "privileges to dump clusters.\n"); FREE_NULL_LIST(assoc_cond.cluster_list); xfree(cluster_name); xfree(file_name); FREE_NULL_LIST(user_list); return SLURM_ERROR; } } xfree(user_name); /* assoc_cond is set up above */ assoc_list = acct_storage_g_get_assocs(db_conn, my_uid, &assoc_cond); FREE_NULL_LIST(assoc_cond.cluster_list); if (!assoc_list) { exit_code = 1; fprintf(stderr, " Problem with query.\n"); xfree(cluster_name); xfree(file_name); return SLURM_ERROR; } else if (!list_count(assoc_list)) { exit_code = 1; fprintf(stderr, " Cluster %s returned nothing.\n", cluster_name); FREE_NULL_LIST(assoc_list); xfree(cluster_name); xfree(file_name); return SLURM_ERROR; } slurmdb_hierarchical_rec_list = slurmdb_get_acct_hierarchical_rec_list( assoc_list); acct_list = acct_storage_g_get_accounts(db_conn, my_uid, NULL); if ((fd = fopen(file_name,"w")) == NULL) { fprintf(stderr, "Can't open file %s, %s\n", file_name, slurm_strerror(errno)); FREE_NULL_LIST(acct_list); FREE_NULL_LIST(assoc_list); xfree(cluster_name); xfree(file_name); FREE_NULL_LIST(slurmdb_hierarchical_rec_list); return SLURM_ERROR; } /* Add header */ if (fprintf(fd, "# To edit this file start with a cluster line " "for the new cluster\n" "# Cluster - 'cluster_name':MaxNodesPerJob=50\n" "# Followed by Accounts you want in this fashion " "(root is created by default)...\n" "# Parent - 'root'\n" "# Account - 'cs':MaxNodesPerJob=5:MaxJobs=4:" "MaxTRESMins=cpu=20:FairShare=399:" "MaxWallDuration=40:Description='Computer Science':" "Organization='LC'\n" "# Any of the options after a ':' can be left out and " "they can be in any order.\n" "# If you want to add any sub accounts just list the " "Parent THAT HAS ALREADY \n" "# BEEN CREATED before the account line in this " "fashion...\n" "# Parent - 'cs'\n" "# Account - 'test':MaxNodesPerJob=1:MaxJobs=1:" "MaxTRESMins=cpu=1:FairShare=1:" "MaxWallDuration=1:" "Description='Test Account':Organization='Test'\n" "# To add users to a account add a line like this after a " "Parent - 'line'\n" "# User - 'lipari':MaxNodesPerJob=2:MaxJobs=3:" "MaxTRESMins=cpu=4:FairShare=1:" "MaxWallDurationPerJob=1\n") < 0) { exit_code = 1; fprintf(stderr, "Can't write to file"); FREE_NULL_LIST(acct_list); FREE_NULL_LIST(assoc_list); xfree(cluster_name); xfree(file_name); FREE_NULL_LIST(slurmdb_hierarchical_rec_list); return SLURM_ERROR; } line = xstrdup_printf("Cluster - '%s'", cluster_name); if (class_str) xstrfmtcat(line, ":Classification='%s'", class_str); slurmdb_hierarchical_rec = list_peek(slurmdb_hierarchical_rec_list); assoc = slurmdb_hierarchical_rec->assoc; if (xstrcmp(assoc->acct, "root")) { fprintf(stderr, "Root association not on the top it was %s\n", assoc->acct); } else print_file_add_limits_to_line(&line, assoc); if (fprintf(fd, "%s\n", line) < 0) { exit_code = 1; fprintf(stderr, " Can't write to file"); FREE_NULL_LIST(acct_list); FREE_NULL_LIST(assoc_list); xfree(cluster_name); xfree(file_name); xfree(line); FREE_NULL_LIST(slurmdb_hierarchical_rec_list); return SLURM_ERROR; } info("%s", line); xfree(line); print_file_slurmdb_hierarchical_rec_list( fd, slurmdb_hierarchical_rec_list, user_list, acct_list); FREE_NULL_LIST(acct_list); FREE_NULL_LIST(assoc_list); xfree(cluster_name); xfree(file_name); FREE_NULL_LIST(slurmdb_hierarchical_rec_list); fclose(fd); return SLURM_SUCCESS; }
/** * basil_geometry - Check node attributes, resolve (X,Y,Z) coordinates. * * Checks both SDB database and ALPS inventory for consistency. The inventory * part is identical to basil_inventory(), with the difference of being called * before valid bitmaps exist, from select_g_node_init(). * Its dependencies are: * - it needs reset_job_bitmaps() in order to rebuild node_bitmap fields, * - it relies on _sync_nodes_to_jobs() to * o kill active jobs on nodes now marked DOWN, * o reset node state to ALLOCATED if it has been marked IDLE here (which is * an error case, since there is no longer an ALPS reservation for the job, * this is caught by the subsequent basil_inventory()). */ extern int basil_geometry(struct node_record *node_ptr_array, int node_cnt) { struct node_record *node_ptr, *end = node_ptr_array + node_cnt; enum basil_version version = get_basil_version(); struct basil_inventory *inv; /* General mySQL */ MYSQL *handle; MYSQL_STMT *stmt = NULL; /* Input parameters */ unsigned int node_id; /* * Use a left outer join here since the attributes table may not be * populated for a given nodeid (e.g. when the node has been disabled * on the SMW via 'xtcli disable'). * The processor table has more authoritative information, if a nodeid * is not listed there, it does not exist. */ const char query[] = "SELECT x_coord, y_coord, z_coord," " cab_position, cab_row, cage, slot, cpu," " LOG2(coremask+1), availmem, " " processor_type " "FROM processor LEFT JOIN attributes " "ON processor_id = nodeid " "WHERE processor_id = ? "; const int PARAM_COUNT = 1; /* node id */ MYSQL_BIND params[PARAM_COUNT]; int x_coord, y_coord, z_coord; int cab, row, cage, slot, cpu; unsigned int node_cpus, node_mem; char proc_type[BASIL_STRING_SHORT]; MYSQL_BIND bind_cols[COLUMN_COUNT]; my_bool is_null[COLUMN_COUNT]; my_bool is_error[COLUMN_COUNT]; int is_gemini, i; time_t now = time(NULL); memset(params, 0, sizeof(params)); params[0].buffer_type = MYSQL_TYPE_LONG; params[0].is_unsigned = true; params[0].is_null = (my_bool *)0; params[0].buffer = (char *)&node_id; memset(bind_cols, 0, sizeof(bind_cols)); for (i = 0; i < COLUMN_COUNT; i ++) { bind_cols[i].is_null = &is_null[i]; bind_cols[i].error = &is_error[i]; if (i == COL_TYPE) { bind_cols[i].buffer_type = MYSQL_TYPE_STRING; bind_cols[i].buffer_length = sizeof(proc_type); bind_cols[i].buffer = proc_type; } else { bind_cols[i].buffer_type = MYSQL_TYPE_LONG; bind_cols[i].is_unsigned = (i >= COL_CORES); } } bind_cols[COL_X].buffer = (char *)&x_coord; bind_cols[COL_Y].buffer = (char *)&y_coord; bind_cols[COL_Z].buffer = (char *)&z_coord; bind_cols[COL_CAB].buffer = (char *)&cab; bind_cols[COL_ROW].buffer = (char *)&row; bind_cols[COL_CAGE].buffer = (char *)&cage; bind_cols[COL_SLOT].buffer = (char *)&slot; bind_cols[COL_CPU].buffer = (char *)&cpu; bind_cols[COL_CORES].buffer = (char *)&node_cpus; bind_cols[COL_MEMORY].buffer = (char *)&node_mem; inv = get_full_inventory(version); if (inv == NULL) fatal("failed to get initial BASIL inventory"); info("BASIL %s initial INVENTORY: %d/%d batch nodes available", bv_names_long[version], inv->batch_avail, inv->batch_total); handle = cray_connect_sdb(); if (handle == NULL) fatal("can not connect to XTAdmin database on the SDB"); is_gemini = cray_is_gemini_system(handle); if (is_gemini < 0) fatal("can not determine Cray XT/XE system type"); stmt = prepare_stmt(handle, query, params, PARAM_COUNT, bind_cols, COLUMN_COUNT); if (stmt == NULL) fatal("can not prepare statement to resolve Cray coordinates"); for (node_ptr = node_record_table_ptr; node_ptr < end; node_ptr++) { struct basil_node *node; char *reason = NULL; if ((node_ptr->name == NULL) || (sscanf(node_ptr->name, "nid%05u", &node_id) != 1)) { error("can not read basil_node_id from %s", node_ptr->name); continue; } if (exec_stmt(stmt, query, bind_cols, COLUMN_COUNT) < 0) fatal("can not resolve %s coordinates", node_ptr->name); if (fetch_stmt(stmt) == 0) { #if _DEBUG info("proc_type:%s cpus:%u memory:%u", proc_type, node_cpus, node_mem); info("row:%u cage:%u slot:%u cpu:%u xyz:%u:%u:%u", row, cage, slot, cpu, x_coord, y_coord, z_coord); #endif if (strcmp(proc_type, "compute") != 0) { /* * Switching a compute node to be a service node * can not happen at runtime: requires a reboot. */ fatal("Node '%s' is a %s node. " "Only compute nodes can appear in slurm.conf.", node_ptr->name, proc_type); } else if (is_null[COL_CORES] || is_null[COL_MEMORY]) { /* * This can happen if a node has been disabled * on the SMW (using 'xtcli disable <nid>'). The * node will still be listed in the 'processor' * table, but have no 'attributes' entry (NULL * values for CPUs/memory). Also, the node will * be invisible to ALPS, which is why we need to * set it down here already. */ node_cpus = node_mem = 0; reason = "node data unknown - disabled on SMW?"; } else if (is_null[COL_X] || is_null[COL_Y] || is_null[COL_Z]) { /* * Similar case to the one above, observed when * a blade has been removed. Node will not * likely show up in ALPS. */ x_coord = y_coord = z_coord = 0; reason = "unknown coordinates - hardware failure?"; } else if (node_cpus < node_ptr->config_ptr->cpus) { /* * FIXME: Might reconsider this policy. * * FastSchedule is ignored here, it requires the * slurm.conf to be consistent with hardware. * * Assumption is that CPU/Memory do not change * at runtime (Cray has no hot-swappable parts). * * Hence checking it in basil_inventory() would * mean a lot of runtime overhead. */ fatal("slurm.conf: node %s has only Procs=%d", node_ptr->name, node_cpus); } else if (node_mem < node_ptr->config_ptr->real_memory) { fatal("slurm.conf: node %s has RealMemory=%d", node_ptr->name, node_mem); } } else if (is_gemini) { fatal("Non-existing Gemini node '%s' in slurm.conf", node_ptr->name); } else { fatal("Non-existing SeaStar node '%s' in slurm.conf", node_ptr->name); } if (!is_gemini) { /* * SeaStar: each node has unique coordinates */ if (node_ptr->arch == NULL) node_ptr->arch = xstrdup("XT"); } else { /* * Gemini: each 2 nodes share the same network * interface (i.e., nodes 0/1 and 2/3 each have * the same coordinates). */ if (node_ptr->arch == NULL) node_ptr->arch = xstrdup("XE"); } xfree(node_ptr->node_hostname); xfree(node_ptr->comm_name); /* * Convention: since we are using SLURM in frontend-mode, * we use Node{Addr,HostName} as follows. * * NodeAddr: <X><Y><Z> coordinates in base-36 encoding * * NodeHostName: c#-#c#s#n# using the NID convention * <cabinet>-<row><chassis><slot><node> * - each cabinet can accommodate 3 chassis (c1..c3) * - each chassis has 8 slots (s0..s7) * - each slot contains 2 or 4 nodes (n0..n3) * o either 2 service nodes (n0/n3) * o or 4 compute nodes (n0..n3) * o or 2 gemini chips (g0/g1 serving n0..n3) * * Example: c0-0c1s0n1 * - c0- = cabinet 0 * - 0 = row 0 * - c1 = chassis 1 * - s0 = slot 0 * - n1 = node 1 */ node_ptr->node_hostname = xstrdup_printf("c%u-%uc%us%un%u", cab, row, cage, slot, cpu); node_ptr->comm_name = xstrdup_printf("%c%c%c", _enc_coord(x_coord), _enc_coord(y_coord), _enc_coord(z_coord)); dim_size[0] = MAX(dim_size[0], (x_coord - 1)); dim_size[1] = MAX(dim_size[1], (y_coord - 1)); dim_size[2] = MAX(dim_size[2], (z_coord - 1)); #if _DEBUG info("%s %s %s cpus=%u, mem=%u reason=%s", node_ptr->name, node_ptr->node_hostname, node_ptr->comm_name, node_cpus, node_mem, reason); #endif /* * Check the current state reported by ALPS inventory, unless it * is already evident that the node has some other problem. */ if (reason == NULL) { for (node = inv->f->node_head; node; node = node->next) if (node->node_id == node_id) break; if (node == NULL) { reason = "not visible to ALPS - check hardware"; } else if (node->state == BNS_DOWN) { reason = "ALPS marked it DOWN"; } else if (node->state == BNS_UNAVAIL) { reason = "node is UNAVAILABLE"; } else if (node->state == BNS_ROUTE) { reason = "node does ROUTING"; } else if (node->state == BNS_SUSPECT) { reason = "entered SUSPECT mode"; } else if (node->state == BNS_ADMINDOWN) { reason = "node is ADMINDOWN"; } else if (node->state != BNS_UP) { reason = "state not UP"; } else if (node->role != BNR_BATCH) { reason = "mode not BATCH"; } else if (node->arch != BNA_XT) { reason = "arch not XT/XE"; } } /* Base state entirely derives from ALPS * NOTE: The node bitmaps are not defined when this code is * initially executed. */ node_ptr->node_state &= NODE_STATE_FLAGS; if (reason) { if (node_ptr->down_time == 0) node_ptr->down_time = now; if (IS_NODE_DOWN(node_ptr)) { /* node still down */ debug("Initial DOWN node %s - %s", node_ptr->name, node_ptr->reason); } else if (slurmctld_conf.slurmd_timeout && ((now - node_ptr->down_time) < slurmctld_conf.slurmd_timeout)) { node_ptr->node_state |= NODE_STATE_NO_RESPOND; } else { info("Initial DOWN node %s - %s", node_ptr->name, reason); node_ptr->reason = xstrdup(reason); /* Node state flags preserved above */ node_ptr->node_state |= NODE_STATE_DOWN; clusteracct_storage_g_node_down(acct_db_conn, node_ptr, now, NULL, slurm_get_slurm_user_id()); } } else { bool node_up_flag = IS_NODE_DOWN(node_ptr) && !IS_NODE_DRAIN(node_ptr) && !IS_NODE_FAIL(node_ptr); node_ptr->down_time = 0; if (node_is_allocated(node)) node_ptr->node_state |= NODE_STATE_ALLOCATED; else node_ptr->node_state |= NODE_STATE_IDLE; node_ptr->node_state &= (~NODE_STATE_NO_RESPOND); xfree(node_ptr->reason); if (node_up_flag) { info("ALPS returned node %s to service", node_ptr->name); clusteracct_storage_g_node_up(acct_db_conn, node_ptr, now); } } free_stmt_result(stmt); } if (stmt_close(stmt)) error("error closing statement: %s", mysql_stmt_error(stmt)); cray_close_sdb(handle); free_inv(inv); return SLURM_SUCCESS; }
static uint32_t _get_wckeyid(mysql_conn_t *mysql_conn, char **name, uid_t uid, char *cluster, uint32_t associd) { uint32_t wckeyid = 0; if (slurm_get_track_wckey()) { /* Here we are looking for the wckeyid if it doesn't * exist we will create one. We don't need to check * if it is good or not. Right now this is the only * place things are created. We do this only on a job * start, not on a job submit since we don't want to * slow down getting the db_index back to the * controller. */ slurmdb_wckey_rec_t wckey_rec; char *user = NULL; /* since we are unable to rely on uids here (someone could not have there uid in the system yet) we must first get the user name from the associd */ if (!(user = _get_user_from_associd( mysql_conn, cluster, associd))) { error("No user for associd %u", associd); goto no_wckeyid; } /* get the default key */ if (!*name) { slurmdb_user_rec_t user_rec; memset(&user_rec, 0, sizeof(slurmdb_user_rec_t)); user_rec.uid = NO_VAL; user_rec.name = user; if (assoc_mgr_fill_in_user(mysql_conn, &user_rec, 1, NULL) != SLURM_SUCCESS) { error("No user by name of %s assoc %u", user, associd); xfree(user); goto no_wckeyid; } if (user_rec.default_wckey) *name = xstrdup_printf("*%s", user_rec.default_wckey); else *name = xstrdup_printf("*"); } memset(&wckey_rec, 0, sizeof(slurmdb_wckey_rec_t)); wckey_rec.name = (*name); wckey_rec.uid = NO_VAL; wckey_rec.user = user; wckey_rec.cluster = cluster; if (assoc_mgr_fill_in_wckey(mysql_conn, &wckey_rec, ACCOUNTING_ENFORCE_WCKEYS, NULL) != SLURM_SUCCESS) { List wckey_list = NULL; slurmdb_wckey_rec_t *wckey_ptr = NULL; wckey_list = list_create(slurmdb_destroy_wckey_rec); wckey_ptr = xmalloc(sizeof(slurmdb_wckey_rec_t)); wckey_ptr->name = xstrdup((*name)); wckey_ptr->user = xstrdup(user); wckey_ptr->cluster = xstrdup(cluster); list_append(wckey_list, wckey_ptr); /* info("adding wckey '%s' '%s' '%s'", */ /* wckey_ptr->name, wckey_ptr->user, */ /* wckey_ptr->cluster); */ /* we have already checked to make sure this was the slurm user before calling this */ if (as_mysql_add_wckeys(mysql_conn, slurm_get_slurm_user_id(), wckey_list) == SLURM_SUCCESS) acct_storage_p_commit(mysql_conn, 1); /* If that worked lets get it */ assoc_mgr_fill_in_wckey(mysql_conn, &wckey_rec, ACCOUNTING_ENFORCE_WCKEYS, NULL); list_destroy(wckey_list); } xfree(user); /* info("got wckeyid of %d", wckey_rec.id); */ wckeyid = wckey_rec.id; } no_wckeyid: return wckeyid; }
/* Find the specified BlueGene node ID and drain it from SLURM */ static void _configure_node_down(rm_bp_id_t bp_id, my_bluegene_t *my_bg) { int bp_num, i, rc; rm_bp_id_t bpid; rm_BP_t *my_bp; rm_location_t bp_loc; rm_BP_state_t bp_state; char bg_down_node[128]; if ((rc = bridge_get_data(my_bg, RM_BPNum, &bp_num)) != SLURM_SUCCESS) { error("bridge_get_data(RM_BPNum): %s", bg_err_str(rc)); bp_num = 0; } for (i=0; i<bp_num; i++) { if (i) { if ((rc = bridge_get_data(my_bg, RM_NextBP, &my_bp)) != SLURM_SUCCESS) { error("bridge_get_data(RM_NextBP): %s", bg_err_str(rc)); continue; } } else { if ((rc = bridge_get_data(my_bg, RM_FirstBP, &my_bp)) != SLURM_SUCCESS) { error("bridge_get_data(RM_FirstBP): %s", bg_err_str(rc)); continue; } } if ((rc = bridge_get_data(my_bp, RM_BPID, &bpid)) != SLURM_SUCCESS) { error("bridge_get_data(RM_BPID): %s", bg_err_str(rc)); continue; } if (!bpid) { error("No BPID was returned from database"); continue; } if (strcmp(bp_id, bpid) != 0) { /* different midplane */ free(bpid); continue; } free(bpid); if ((rc = bridge_get_data(my_bp, RM_BPState, &bp_state)) != SLURM_SUCCESS) { error("bridge_get_data(RM_BPState): %s", bg_err_str(rc)); continue; } if (bp_state != RM_BP_UP) /* already down */ continue; if ((rc = bridge_get_data(my_bp, RM_BPLoc, &bp_loc)) != SLURM_SUCCESS) { error("bridge_get_data(RM_BPLoc): %s", bg_err_str(rc)); continue; } /* make sure we have this midplane in the system */ if (bp_loc.X >= DIM_SIZE[X] || bp_loc.Y >= DIM_SIZE[Y] || bp_loc.Z >= DIM_SIZE[Z]) { debug4("node %s%c%c%c isn't configured", bg_conf->slurm_node_prefix, alpha_num[bp_loc.X], alpha_num[bp_loc.Y], alpha_num[bp_loc.Z]); continue; } snprintf(bg_down_node, sizeof(bg_down_node), "%s%c%c%c", bg_conf->slurm_node_prefix, alpha_num[bp_loc.X], alpha_num[bp_loc.Y], alpha_num[bp_loc.Z]); if (node_already_down(bg_down_node)) break; error("switch for node %s is bad", bg_down_node); slurm_drain_nodes(bg_down_node, "select_bluegene: MMCS switch not UP", slurm_get_slurm_user_id()); break; } }
/* * This could potentially lock the node lock in the slurmctld with * slurm_drain_node, or slurm_fail_job so if slurmctld_locked is called we * will call the functions without locking the locks again. */ extern int down_nodecard(char *mp_name, bitoff_t io_start, bool slurmctld_locked) { List requests = NULL; List delete_list = NULL; ListIterator itr = NULL; bg_record_t *bg_record = NULL, *found_record = NULL, tmp_record; bg_record_t *smallest_bg_record = NULL; struct node_record *node_ptr = NULL; int mp_bit = 0; static int io_cnt = NO_VAL; static int create_size = NO_VAL; static select_ba_request_t blockreq; int rc = SLURM_SUCCESS; char *reason = "select_bluegene: nodecard down"; xassert(mp_name); if (io_cnt == NO_VAL) { io_cnt = 1; /* Translate 1 nodecard count to ionode count */ if ((io_cnt *= bg_conf->io_ratio)) io_cnt--; /* make sure we create something that is able to be created */ if (bg_conf->smallest_block < bg_conf->nodecard_cnode_cnt) create_size = bg_conf->nodecard_cnode_cnt; else create_size = bg_conf->smallest_block; } node_ptr = find_node_record(mp_name); if (!node_ptr) { error ("down_sub_node_blocks: invalid node specified '%s'", mp_name); return EINVAL; } /* this is here for sanity check to make sure we don't core on these bits when we set them below. */ if (io_start >= bg_conf->ionodes_per_mp || (io_start+io_cnt) >= bg_conf->ionodes_per_mp) { debug("io %d-%d not configured on this " "system, only %d ionodes per midplane", io_start, io_start+io_cnt, bg_conf->ionodes_per_mp); return EINVAL; } mp_bit = (node_ptr - node_record_table_ptr); memset(&blockreq, 0, sizeof(select_ba_request_t)); blockreq.conn_type[0] = SELECT_SMALL; blockreq.save_name = mp_name; debug3("here setting node %d of %d and ionodes %d-%d of %d", mp_bit, node_record_count, io_start, io_start+io_cnt, bg_conf->ionodes_per_mp); memset(&tmp_record, 0, sizeof(bg_record_t)); tmp_record.mp_count = 1; tmp_record.cnode_cnt = bg_conf->nodecard_cnode_cnt; tmp_record.mp_bitmap = bit_alloc(node_record_count); bit_set(tmp_record.mp_bitmap, mp_bit); tmp_record.ionode_bitmap = bit_alloc(bg_conf->ionodes_per_mp); bit_nset(tmp_record.ionode_bitmap, io_start, io_start+io_cnt); slurm_mutex_lock(&block_state_mutex); itr = list_iterator_create(bg_lists->main); while ((bg_record = list_next(itr))) { if (!bit_test(bg_record->mp_bitmap, mp_bit)) continue; if (!blocks_overlap(bg_record, &tmp_record)) continue; if (bg_record->job_running > NO_JOB_RUNNING) { if (slurmctld_locked) job_fail(bg_record->job_running); else slurm_fail_job(bg_record->job_running); } /* If Running Dynamic mode and the block is smaller than the create size just continue on. */ if ((bg_conf->layout_mode == LAYOUT_DYNAMIC) && (bg_record->cnode_cnt < create_size)) { if (!delete_list) delete_list = list_create(NULL); list_append(delete_list, bg_record); continue; } /* keep track of the smallest size that is at least the size of create_size. */ if (!smallest_bg_record || (smallest_bg_record->cnode_cnt > bg_record->cnode_cnt)) smallest_bg_record = bg_record; } list_iterator_destroy(itr); slurm_mutex_unlock(&block_state_mutex); if (bg_conf->layout_mode != LAYOUT_DYNAMIC) { debug3("running non-dynamic mode"); /* This should never happen, but just in case... */ if (delete_list) list_destroy(delete_list); /* If we found a block that is smaller or equal to a midplane we will just mark it in an error state as opposed to draining the node. */ if (smallest_bg_record && (smallest_bg_record->cnode_cnt < bg_conf->mp_cnode_cnt)){ if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) { rc = SLURM_NO_CHANGE_IN_DATA; goto cleanup; } rc = put_block_in_error_state( smallest_bg_record, reason); goto cleanup; } debug("No block under 1 midplane available for this nodecard. " "Draining the whole node."); if (!node_already_down(mp_name)) { if (slurmctld_locked) drain_nodes(mp_name, reason, slurm_get_slurm_user_id()); else slurm_drain_nodes(mp_name, reason, slurm_get_slurm_user_id()); } rc = SLURM_SUCCESS; goto cleanup; } /* below is only for Dynamic mode */ if (delete_list) { int cnt_set = 0; bitstr_t *iobitmap = bit_alloc(bg_conf->ionodes_per_mp); /* don't lock here since it is handled inside the put_block_in_error_state */ itr = list_iterator_create(delete_list); while ((bg_record = list_next(itr))) { debug2("combining smaller than nodecard " "dynamic block %s", bg_record->bg_block_id); while (bg_record->job_running > NO_JOB_RUNNING) sleep(1); bit_or(iobitmap, bg_record->ionode_bitmap); cnt_set++; } list_iterator_destroy(itr); list_destroy(delete_list); if (!cnt_set) { FREE_NULL_BITMAP(iobitmap); rc = SLURM_ERROR; goto cleanup; } /* set the start to be the same as the start of the ionode_bitmap. If no ionodes set (not a small block) set io_start = 0. */ if ((io_start = bit_ffs(iobitmap)) == -1) { io_start = 0; if (create_size > bg_conf->nodecard_cnode_cnt) blockreq.small128 = 4; else blockreq.small32 = 16; } else if (create_size <= bg_conf->nodecard_cnode_cnt) blockreq.small32 = 1; else /* this should never happen */ blockreq.small128 = 1; FREE_NULL_BITMAP(iobitmap); } else if (smallest_bg_record) { debug2("smallest dynamic block is %s", smallest_bg_record->bg_block_id); if (smallest_bg_record->state & BG_BLOCK_ERROR_FLAG) { rc = SLURM_NO_CHANGE_IN_DATA; goto cleanup; } while (smallest_bg_record->job_running > NO_JOB_RUNNING) sleep(1); if (smallest_bg_record->cnode_cnt == create_size) { rc = put_block_in_error_state( smallest_bg_record, reason); goto cleanup; } if (create_size > smallest_bg_record->cnode_cnt) { /* we should never get here. This means we * have a create_size that is bigger than a * block that is already made. */ rc = put_block_in_error_state( smallest_bg_record, reason); goto cleanup; } debug3("node count is %d", smallest_bg_record->cnode_cnt); switch(smallest_bg_record->cnode_cnt) { #ifndef HAVE_BGL case 64: blockreq.small32 = 2; break; case 256: blockreq.small32 = 8; break; #endif case 128: blockreq.small32 = 4; break; case 512: default: blockreq.small32 = 16; break; } if (create_size != bg_conf->nodecard_cnode_cnt) { blockreq.small128 = blockreq.small32 / 4; blockreq.small32 = 0; io_start = 0; } else if ((io_start = bit_ffs(smallest_bg_record->ionode_bitmap)) == -1) /* set the start to be the same as the start of the ionode_bitmap. If no ionodes set (not a small block) set io_start = 0. */ io_start = 0; } else { switch(create_size) { #ifndef HAVE_BGL case 64: blockreq.small64 = 8; break; case 256: blockreq.small256 = 2; #endif case 32: blockreq.small32 = 16; break; case 128: blockreq.small128 = 4; break; case 512: if (!node_already_down(mp_name)) { char *reason = "select_bluegene: nodecard down"; if (slurmctld_locked) drain_nodes(mp_name, reason, slurm_get_slurm_user_id()); else slurm_drain_nodes( mp_name, reason, slurm_get_slurm_user_id()); } rc = SLURM_SUCCESS; goto cleanup; break; default: error("Unknown create size of %d", create_size); break; } /* since we don't have a block in this midplane we need to start at the beginning. */ io_start = 0; /* we also need a bg_block to pretend to be the smallest block that takes up the entire midplane. */ } /* Here we need to add blocks that take up nodecards on this midplane. Since Slurm only keeps track of midplanes natively this is the only want to handle this case. */ requests = list_create(destroy_bg_record); add_bg_record(requests, NULL, &blockreq, 1, io_start); slurm_mutex_lock(&block_state_mutex); delete_list = list_create(NULL); while ((bg_record = list_pop(requests))) { itr = list_iterator_create(bg_lists->main); while ((found_record = list_next(itr))) { if (!blocks_overlap(bg_record, found_record)) continue; list_push(delete_list, found_record); list_remove(itr); } list_iterator_destroy(itr); /* we need to add this record since it doesn't exist */ if (bridge_block_create(bg_record) == SLURM_ERROR) { destroy_bg_record(bg_record); error("down_sub_node_blocks: " "unable to configure block in api"); continue; } debug("adding block %s to fill in small blocks " "around bad nodecards", bg_record->bg_block_id); print_bg_record(bg_record); list_append(bg_lists->main, bg_record); if (bit_overlap(bg_record->ionode_bitmap, tmp_record.ionode_bitmap)) { /* here we know the error block doesn't exist so just set the state here */ slurm_mutex_unlock(&block_state_mutex); rc = put_block_in_error_state(bg_record, reason); slurm_mutex_lock(&block_state_mutex); } } list_destroy(requests); if (delete_list) { slurm_mutex_unlock(&block_state_mutex); free_block_list(NO_VAL, delete_list, 0, 0); list_destroy(delete_list); } slurm_mutex_lock(&block_state_mutex); sort_bg_record_inc_size(bg_lists->main); slurm_mutex_unlock(&block_state_mutex); last_bg_update = time(NULL); cleanup: FREE_NULL_BITMAP(tmp_record.mp_bitmap); FREE_NULL_BITMAP(tmp_record.ionode_bitmap); return rc; }
/* block_state_mutex should be locked before calling */ static int _check_all_blocks_error(int node_inx, time_t event_time, char *reason) { bg_record_t *bg_record = NULL; ListIterator itr = NULL; struct node_record send_node, *node_ptr; struct config_record config_rec; int total_cpus = 0; int rc = SLURM_SUCCESS; xassert(node_inx <= node_record_count); node_ptr = &node_record_table_ptr[node_inx]; /* only do this if the node isn't in the DRAINED state. DRAINING is ok */ if (IS_NODE_DRAINED(node_ptr)) return rc; memset(&send_node, 0, sizeof(struct node_record)); memset(&config_rec, 0, sizeof(struct config_record)); send_node.name = xstrdup(node_ptr->name); send_node.config_ptr = &config_rec; /* here we need to check if there are any other blocks on this midplane and adjust things correctly */ itr = list_iterator_create(bg_lists->main); while ((bg_record = list_next(itr))) { /* only look at other nodes in error state */ if (!(bg_record->state & BG_BLOCK_ERROR_FLAG)) continue; if (!bit_test(bg_record->mp_bitmap, node_inx)) continue; if (bg_record->cpu_cnt >= bg_conf->cpus_per_mp) { total_cpus = bg_conf->cpus_per_mp; break; } else total_cpus += bg_record->cpu_cnt; } list_iterator_destroy(itr); send_node.cpus = total_cpus; config_rec.cpus = total_cpus; if (send_node.cpus) { if (!reason) reason = "update block: setting partial node down."; if (!node_ptr->reason) node_ptr->reason = xstrdup(reason); node_ptr->reason_time = event_time; node_ptr->reason_uid = slurm_get_slurm_user_id(); send_node.node_state = NODE_STATE_ERROR; rc = clusteracct_storage_g_node_down(acct_db_conn, &send_node, event_time, reason, node_ptr->reason_uid); } else { if (node_ptr->reason) xfree(node_ptr->reason); node_ptr->reason_time = 0; node_ptr->reason_uid = NO_VAL; send_node.node_state = NODE_STATE_IDLE; rc = clusteracct_storage_g_node_up(acct_db_conn, &send_node, event_time); } xfree(send_node.name); return rc; }