/* Translate a job's feature specification to node boot options * RET node boot options, must be xfreed */ extern char *node_features_g_job_xlate(char *job_features) { DEF_TIMERS; char *node_features = NULL, *tmp_str; int i; START_TIMER; (void) node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; i < g_context_cnt; i++) { tmp_str = (*(ops[i].job_xlate))(job_features); if (tmp_str) { if (node_features) { xstrfmtcat(node_features, ",%s", tmp_str); xfree(tmp_str); } else { node_features = tmp_str; } } } slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_job_xlate"); return node_features; }
/* Translate a node's feature specification by replacing any features associated * with this plugin in the original value with the new values, preserving any * features that are not associated with this plugin * RET node's new merged features, must be xfreed */ extern char *node_features_g_node_xlate(char *new_features, char *orig_features) { DEF_TIMERS; char *new_value = NULL, *tmp_str; int i; START_TIMER; (void) node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; i < g_context_cnt; i++) { if (new_value) tmp_str = new_value; else if (orig_features) tmp_str = xstrdup(orig_features); else tmp_str = NULL; new_value = (*(ops[i].node_xlate))(new_features, tmp_str); xfree(tmp_str); } slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_node_xlate"); return new_value; }
/* * Execute the job_submit() function in each job submit plugin. * If any plugin function returns anything other than SLURM_SUCCESS * then stop and forward it's return value. * IN job_desc - Job request specification * IN submit_uid - User issuing job submit request * OUT err_msg - Custom error message to the user, caller to xfree results */ extern int job_submit_plugin_submit(struct job_descriptor *job_desc, uint32_t submit_uid, char **err_msg) { DEF_TIMERS; int i, rc; xassert(verify_lock(CONF_LOCK, READ_LOCK)); xassert(verify_lock(JOB_LOCK, READ_LOCK)); xassert(verify_lock(NODE_LOCK, READ_LOCK)); xassert(verify_lock(PART_LOCK, READ_LOCK)); START_TIMER; rc = job_submit_plugin_init(); slurm_mutex_lock(&g_context_lock); /* NOTE: On function entry read locks are set on config, job, node and * partition structures. Do not attempt to unlock them and then * lock again (say with a write lock) since doing so will trigger * a deadlock with the g_context_lock above. */ for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) rc = (*(ops[i].submit))(job_desc, submit_uid, err_msg); slurm_mutex_unlock(&g_context_lock); END_TIMER2("job_submit_plugin_submit"); return rc; }
/* * load_part_uid_allow_list - reload the allow_uid list of partitions * if required (updated group file or force set) * IN force - if set then always reload the allow_uid list */ void load_part_uid_allow_list(int force) { static time_t last_update_time; time_t temp_time; ListIterator part_iterator; struct part_record *part_ptr; DEF_TIMERS; START_TIMER; temp_time = _get_group_tlm(); if ((force == 0) && (temp_time == last_update_time)) return; debug("Updating partition uid access list"); last_update_time = temp_time; last_part_update = time(NULL); part_iterator = list_iterator_create(part_list); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { xfree(part_ptr->allow_uids); part_ptr->allow_uids = _get_groups_members(part_ptr->allow_groups); } clear_group_cache(); list_iterator_destroy(part_iterator); END_TIMER2("load_part_uid_allow_list"); }
/* Get node features plugin configuration */ extern List node_features_g_get_config(void) { DEF_TIMERS; int i, rc; List conf_list = NULL; config_plugin_params_t *p; START_TIMER; rc = node_features_g_init(); if (g_context_cnt > 0) conf_list = list_create(destroy_config_plugin_params); slurm_mutex_lock(&g_context_lock); for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) { p = xmalloc(sizeof(config_plugin_params_t)); p->key_pairs = list_create(destroy_config_key_pair); (*(ops[i].get_config))(p); if (!p->name) destroy_config_plugin_params(p); else list_append(conf_list, p); } slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_get_config"); return conf_list; }
/* Get this node's current and available MCDRAM and NUMA settings from BIOS. * avail_modes IN/OUT - available modes, must be xfreed * current_mode IN/OUT - current modes, must be xfreed */ extern void node_features_g_node_state(char **avail_modes, char **current_mode) { DEF_TIMERS; int i; START_TIMER; (void) node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; i < g_context_cnt; i++) { (*(ops[i].node_state))(avail_modes, current_mode); } slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_node_state"); }
/* Perform set up for step launch * mem_sort IN - Trigger sort of memory pages (KNL zonesort) * numa_bitmap IN - NUMA nodes allocated to this job */ extern void node_features_g_step_config(bool mem_sort, bitstr_t *numa_bitmap) { DEF_TIMERS; int i; START_TIMER; if (node_features_g_init() != SLURM_SUCCESS) return; slurm_mutex_lock(&g_context_lock); for (i = 0; i < g_context_cnt; i++) (*(ops[i].step_config))(mem_sort, numa_bitmap); slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_step_config"); }
/* Test if a job's feature specification is valid */ extern int node_features_g_job_valid(char *job_features) { DEF_TIMERS; int i, rc; START_TIMER; rc = node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) rc = (*(ops[i].job_valid))(job_features); slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_job_valid"); return rc; }
/* Update active and available features on specified nodes, sets features on * all nodes is node_list is NULL */ extern int node_features_g_get_node(char *node_list) { DEF_TIMERS; int i, rc; START_TIMER; rc = node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) rc = (*(ops[i].get_node))(node_list); slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_get_node"); return rc; }
/* Reset plugin configuration information */ extern int node_features_g_reconfig(void) { DEF_TIMERS; int i, rc; START_TIMER; rc = node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) rc = (*(ops[i].reconfig))(); slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_reconfig"); return rc; }
/* Set's the node's active features based upon job constraints. * NOTE: Executed by the slurmd daemon. * IN active_features - New active features * RET error code */ extern int node_features_g_node_set(char *active_features) { DEF_TIMERS; int i, rc = SLURM_SUCCESS; START_TIMER; (void) node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) { rc = (*(ops[i].node_set))(active_features); } slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_node_set"); return rc; }
/* Return TRUE if this (one) feature name is under this plugin's control */ extern bool node_features_g_changeable_feature(char *feature) { DEF_TIMERS; int i; bool changeable = false; START_TIMER; (void) node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; ((i < g_context_cnt) && !changeable); i++) changeable = (*(ops[i].changeable_feature))(feature); slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_reconfig"); return changeable; }
/* Determine if the specified user can modify the currently available node * features */ extern bool node_features_g_user_update(uid_t uid) { DEF_TIMERS; bool result = true; int i; START_TIMER; (void) node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; ((i < g_context_cnt) && (result == true)); i++) { result = (*(ops[i].user_update))(uid); } slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_user_update"); return result; }
/* Return estimated reboot time, in seconds */ extern uint32_t node_features_g_boot_time(void) { DEF_TIMERS; uint32_t boot_time = 0; int i; START_TIMER; (void) node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; i < g_context_cnt; i++) { boot_time = MAX(boot_time, (*(ops[i].boot_time))()); } slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_user_update"); return boot_time; }
/* * Execute the job_modify() function in each job submit plugin. * If any plugin function returns anything other than SLURM_SUCCESS * then stop and forward it's return value. */ extern int job_submit_plugin_modify(struct job_descriptor *job_desc, struct job_record *job_ptr, uint32_t submit_uid) { DEF_TIMERS; int i, rc; START_TIMER; rc = job_submit_plugin_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; ((i < g_context_cnt) && (rc == SLURM_SUCCESS)); i++) rc = (*(ops[i].modify))(job_desc, job_ptr, submit_uid); slurm_mutex_unlock(&g_context_lock); END_TIMER2("job_submit_plugin_modify"); return rc; }
/* Return true if the plugin requires RebootProgram for booting nodes */ extern bool node_features_g_node_reboot(void) { DEF_TIMERS; bool node_reboot = false; int i; START_TIMER; (void) node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; i < g_context_cnt; i++) { node_reboot = (*(ops[i].node_reboot))(); if (node_reboot) break; } slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_node_reboot"); return node_reboot; }
/* Return bitmap of KNL nodes, NULL if none identified */ extern bitstr_t *node_features_g_get_node_bitmap(void) { DEF_TIMERS; bitstr_t *node_bitmap = NULL; int i; START_TIMER; (void) node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; i < g_context_cnt; i++) { node_bitmap = (*(ops[i].get_node_bitmap))(); if (node_bitmap) break; } slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_get_node_bitmap"); return node_bitmap; }
/* * Return TRUE if the specified node update request is valid with respect * to features changes (i.e. don't permit a non-KNL node to set KNL features). * * node_ptr IN - Pointer to struct node_record record * update_node_msg IN - Pointer to update request */ extern bool node_features_g_node_update_valid(void *node_ptr, update_node_msg_t *update_node_msg) { DEF_TIMERS; bool update_valid = true; int i; START_TIMER; (void) node_features_g_init(); slurm_mutex_lock(&g_context_lock); for (i = 0; i < g_context_cnt; i++) { update_valid = (*(ops[i].node_update_valid))(node_ptr, update_node_msg); if (!update_valid) break; } slurm_mutex_unlock(&g_context_lock); END_TIMER2("node_features_g_node_update_valid"); return update_valid; }
/* * event_notify - Notify Moab of some event * event_code IN - message code to send Moab * 1234 - job state change * 1235 - partition state change * desc IN - event description * RET 0 on success, -1 on failure */ extern int event_notify(int event_code, char *desc) { time_t now = time(NULL); int rc = 0, retry = 2; char *event_msg; DEF_TIMERS; START_TIMER; if (e_port == 0) { /* Event notification disabled */ return 0; } if (event_code == 1234) { /* job change */ if (job_aggregation_time && (difftime(now, last_notify_time) < job_aggregation_time)) { debug("wiki event notification already sent recently"); return 0; } event_msg = "1234"; } else if (event_code == 1235) { /* configuration change */ event_msg = "1235"; } else { error("event_notify: invalid event code: %d", event_code); return -1; } slurm_mutex_lock(&event_mutex); while (retry) { if ((event_fd == -1) && ((rc = _open_fd(now)) == -1)) { /* Can't even open socket. * Don't retry again for a while (2 mins) * to avoid long delays from ETIMEDOUT */ last_notify_time = now + 120; break; } if (write(event_fd, event_msg, (strlen(event_msg) + 1)) > 0) { verbose("wiki event_notification sent: %s", desc); last_notify_time = now; rc = 0; /* Dave Jackson says to leave the connection * open, but Moab isn't. Without the _close_fd() * here, the next write() generates a broken pipe * error. Just remove the _close_fd() and this * comment when Moab maintains the connection. */ _close_fd(); break; /* success */ } error("wiki event notification failure: %m"); rc = -1; retry--; if ((errno == EAGAIN) || (errno == EINTR)) continue; _close_fd(); if (errno == EPIPE) { /* If Moab closed the socket we get an EPIPE, * retry once */ continue; } else { break; } } slurm_mutex_unlock(&event_mutex); END_TIMER2("event_notify"); return rc; }
/* rollup usage for one cluster */ static int _cluster_rollup_usage(pgsql_conn_t *pg_conn, char *cluster, time_t sent_start, time_t sent_end, uint16_t archive_data) { DEF_VARS; int rc = SLURM_SUCCESS; time_t last_hour = sent_start; time_t last_day = sent_start; time_t last_month = sent_start; time_t start_time = 0; time_t end_time = 0; time_t my_time = sent_end; struct tm start_tm, end_tm; DEF_TIMERS; char *ru_fields = "hourly_rollup, daily_rollup, monthly_rollup"; enum { F_HOUR, F_DAY, F_MONTH, F_COUNT }; if (!sent_start) { query = xstrdup_printf("SELECT %s FROM %s.%s LIMIT 1", ru_fields, cluster, last_ran_table); result = DEF_QUERY_RET; if (!result) return SLURM_ERROR; if (PQntuples(result)) { last_hour = atoi(PG_VAL(F_HOUR)); last_day = atoi(PG_VAL(F_DAY)); last_month = atoi(PG_VAL(F_MONTH)); PQclear(result); } else { time_t now = time(NULL); PQclear(result); query = xstrdup_printf("SELECT %s.init_last_ran(%ld);", cluster, now); result = DEF_QUERY_RET; if (!result) return SLURM_ERROR; last_hour = last_day = last_month = atoi(PG_VAL(0)); PQclear(result); if (last_hour < 0) { debug("cluster %s not registered, " "not doing rollup", cluster); return SLURM_SUCCESS; } } } if (!my_time) my_time = time(NULL); if (!localtime_r(&last_hour, &start_tm)) { error("Couldn't get localtime from hour start %ld", last_hour); return SLURM_ERROR; } if (!localtime_r(&my_time, &end_tm)) { error("Couldn't get localtime from hour end %ld", my_time); return SLURM_ERROR; } /* below and anywhere in a rollup plugin when dealing with * epoch times we need to set the tm_isdst = -1 so we don't * have to worry about the time changes. Not setting it to -1 * will cause problems in the day and month with the date change. */ /* align to hour boundary */ start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_isdst = -1; start_time = mktime(&start_tm); end_tm.tm_sec = 0; end_tm.tm_min = 0; end_tm.tm_isdst = -1; end_time = mktime(&end_tm); /* info("hour start %s", ctime(&start_time)); */ /* info("hour end %s", ctime(&end_time)); */ /* info("diff is %d", end_time-start_time); */ //slurm_mutex_lock(&rollup_lock); global_last_rollup = end_time; //slurm_mutex_unlock(&rollup_lock); if (end_time-start_time > 0) { START_TIMER; if ((rc = pgsql_hourly_rollup(pg_conn, cluster, start_time, end_time)) != SLURM_SUCCESS) return rc; END_TIMER3("hourly_rollup", 5000000); /* If we have a sent_end do not update the last_run_table */ if (!sent_end) query = xstrdup_printf( "UPDATE %s.%s SET hourly_rollup=%ld", cluster, last_ran_table, end_time); } else { debug2("no need to run this hour %ld <= %ld", end_time, start_time); } if (!localtime_r(&last_day, &start_tm)) { error("Couldn't get localtime from day %ld", last_day); return SLURM_ERROR; } /* align to day boundary */ start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_isdst = -1; start_time = mktime(&start_tm); end_tm.tm_hour = 0; end_tm.tm_isdst = -1; end_time = mktime(&end_tm); /* info("day start %s", ctime(&start_time)); */ /* info("day end %s", ctime(&end_time)); */ /* info("diff is %d", end_time-start_time); */ if (end_time-start_time > 0) { START_TIMER; if ((rc = pgsql_daily_rollup(pg_conn, cluster, start_time, end_time, archive_data)) != SLURM_SUCCESS) return rc; END_TIMER2("daily_rollup"); if (query && !sent_end) xstrfmtcat(query, ", daily_rollup=%ld", (long)end_time); else if (!sent_end) query = xstrdup_printf( "UPDATE %s.%s SET daily_rollup=%ld", cluster, last_ran_table, (long)end_time); } else { debug2("no need to run this day %ld <= %ld", (long)end_time, (long)start_time); } if (!localtime_r(&last_month, &start_tm)) { error("Couldn't get localtime from month %ld", last_month); return SLURM_ERROR; } /* align to month boundary */ start_tm.tm_sec = 0; start_tm.tm_min = 0; start_tm.tm_hour = 0; start_tm.tm_mday = 1; start_tm.tm_isdst = -1; start_time = mktime(&start_tm); end_time = mktime(&end_tm); end_tm.tm_sec = 0; end_tm.tm_min = 0; end_tm.tm_hour = 0; end_tm.tm_mday = 1; end_tm.tm_isdst = -1; end_time = mktime(&end_tm); /* info("month start %s", ctime(&start_time)); */ /* info("month end %s", ctime(&end_time)); */ /* info("diff is %d", end_time-start_time); */ if (end_time-start_time > 0) { START_TIMER; if ((rc = pgsql_monthly_rollup(pg_conn, cluster, start_time, end_time, archive_data)) != SLURM_SUCCESS) return rc; END_TIMER2("monthly_rollup"); if (query && !sent_end) xstrfmtcat(query, ", monthly_rollup=%ld", (long)end_time); else if (!sent_end) query = xstrdup_printf( "UPDATE %s.%s SET monthly_rollup=%ld", cluster, last_ran_table, (long)end_time); } else { debug2("no need to run this month %ld <= %ld", (long)end_time, (long)start_time); } if (query) { rc = DEF_QUERY_RET_RC; } return rc; }
/*****************************************************************************\ * Parse, process and respond to a request \*****************************************************************************/ static void _proc_msg(slurm_fd_t new_fd, char *msg) { DEF_TIMERS; char *req, *cmd_ptr, *msg_type = NULL; char response[128]; if (new_fd < 0) return; START_TIMER; if (!msg) { err_code = -300; err_msg = "NULL request message"; error("wiki: NULL request message"); goto resp_msg; } if (_parse_msg(msg, &req) != 0) goto resp_msg; cmd_ptr = strstr(req, "CMD="); if (cmd_ptr == NULL) { err_code = -300; err_msg = "request lacks CMD"; error("wiki: request lacks CMD"); goto resp_msg; } cmd_ptr +=4; err_code = 0; if (strncmp(cmd_ptr, "GETJOBS", 7) == 0) { msg_type = "wiki:GETJOBS"; if (!get_jobs(cmd_ptr, &err_code, &err_msg)) goto free_resp_msg; } else if (strncmp(cmd_ptr, "GETNODES", 8) == 0) { msg_type = "wiki:GETNODES"; if (!get_nodes(cmd_ptr, &err_code, &err_msg)) goto free_resp_msg; } else if (strncmp(cmd_ptr, "STARTJOB", 8) == 0) { msg_type = "wiki:STARTJOB"; start_job(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "CANCELJOB", 9) == 0) { msg_type = "wiki:CANCELJOB"; cancel_job(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "SUSPENDJOB", 10) == 0) { msg_type = "wiki:SUSPENDJOB"; suspend_job(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "RESUMEJOB", 9) == 0) { msg_type = "wiki:RESUMEJOB"; resume_job(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "MODIFYJOB", 9) == 0) { msg_type = "wiki:MODIFYJOB"; job_modify_wiki(cmd_ptr, &err_code, &err_msg); } else { err_code = -300; err_msg = "unsupported request type"; error("wiki: unrecognized request type: %s", req); } END_TIMER2(msg_type); resp_msg: snprintf(response, sizeof(response), "SC=%d RESPONSE=%s", err_code, err_msg); _send_reply(new_fd, response); return; free_resp_msg: /* Message is pre-formatted by get_jobs and get_nodes * ONLY if no error. Send message and xfree the buffer. */ _send_reply(new_fd, err_msg); xfree(err_msg); return; }
/* NOTE: Insure that mysql_conn->lock is NOT set on function entry */ static int _mysql_make_table_current(mysql_conn_t *mysql_conn, char *table_name, storage_field_t *fields, char *ending) { char *query = NULL; char *correct_query = NULL; MYSQL_RES *result = NULL; MYSQL_ROW row; int i = 0; List columns = NULL; ListIterator itr = NULL; char *col = NULL; int adding = 0; int run_update = 0; char *primary_key = NULL; char *unique_index = NULL; int old_primary = 0; char *old_index = NULL; char *temp = NULL, *temp2 = NULL; List keys_list = NULL; db_key_t *db_key = NULL; DEF_TIMERS; /* figure out the unique keys in the table */ query = xstrdup_printf("show index from %s where non_unique=0", table_name); if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) { xfree(query); return SLURM_ERROR; } xfree(query); while ((row = mysql_fetch_row(result))) { // row[2] is the key name if (!strcasecmp(row[2], "PRIMARY")) old_primary = 1; else if (!old_index) old_index = xstrdup(row[2]); } mysql_free_result(result); /* figure out the non-unique keys in the table */ query = xstrdup_printf("show index from %s where non_unique=1", table_name); if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) { xfree(query); return SLURM_ERROR; } xfree(query); itr = NULL; keys_list = list_create(_destroy_db_key); while ((row = mysql_fetch_row(result))) { if (!itr) itr = list_iterator_create(keys_list); else list_iterator_reset(itr); while ((db_key = list_next(itr))) { if (!strcmp(db_key->name, row[2])) break; } if (db_key) { xstrfmtcat(db_key->columns, ", %s", row[4]); } else { db_key = xmalloc(sizeof(db_key_t)); db_key->name = xstrdup(row[2]); // name db_key->columns = xstrdup(row[4]); // column name list_append(keys_list, db_key); // don't use list_push } } mysql_free_result(result); if (itr) { list_iterator_destroy(itr); itr = NULL; } /* figure out the existing columns in the table */ query = xstrdup_printf("show columns from %s", table_name); if (!(result = mysql_db_query_ret(mysql_conn, query, 0))) { xfree(query); xfree(old_index); FREE_NULL_LIST(keys_list); return SLURM_ERROR; } xfree(query); columns = list_create(slurm_destroy_char); while ((row = mysql_fetch_row(result))) { col = xstrdup(row[0]); //Field list_append(columns, col); } mysql_free_result(result); itr = list_iterator_create(columns); query = xstrdup_printf("alter ignore table %s", table_name); correct_query = xstrdup_printf("alter ignore table %s", table_name); START_TIMER; while (fields[i].name) { int found = 0; list_iterator_reset(itr); while ((col = list_next(itr))) { if (!strcmp(col, fields[i].name)) { xstrfmtcat(query, " modify `%s` %s,", fields[i].name, fields[i].options); xstrfmtcat(correct_query, " modify `%s` %s,", fields[i].name, fields[i].options); list_delete_item(itr); found = 1; break; } } if (!found) { if (i) { info("adding column %s after %s in table %s", fields[i].name, fields[i-1].name, table_name); xstrfmtcat(query, " add `%s` %s after %s,", fields[i].name, fields[i].options, fields[i-1].name); xstrfmtcat(correct_query, " modify `%s` %s,", fields[i].name, fields[i].options); } else { info("adding column %s at the beginning " "of table %s", fields[i].name, table_name); xstrfmtcat(query, " add `%s` %s first,", fields[i].name, fields[i].options); xstrfmtcat(correct_query, " modify `%s` %s,", fields[i].name, fields[i].options); } adding = 1; } i++; } list_iterator_reset(itr); while ((col = list_next(itr))) { adding = 1; info("dropping column %s from table %s", col, table_name); xstrfmtcat(query, " drop %s,", col); } list_iterator_destroy(itr); list_destroy(columns); if ((temp = strstr(ending, "primary key ("))) { int open = 0, close =0; int end = 0; while (temp[end++]) { if (temp[end] == '(') open++; else if (temp[end] == ')') close++; else continue; if (open == close) break; } if (temp[end]) { end++; primary_key = xstrndup(temp, end); if (old_primary) { xstrcat(query, " drop primary key,"); xstrcat(correct_query, " drop primary key,"); } xstrfmtcat(query, " add %s,", primary_key); xstrfmtcat(correct_query, " add %s,", primary_key); xfree(primary_key); } } if ((temp = strstr(ending, "unique index ("))) { int open = 0, close =0; int end = 0; while (temp[end++]) { if (temp[end] == '(') open++; else if (temp[end] == ')') close++; else continue; if (open == close) break; } if (temp[end]) { end++; unique_index = xstrndup(temp, end); if (old_index) { xstrfmtcat(query, " drop index %s,", old_index); xstrfmtcat(correct_query, " drop index %s,", old_index); } xstrfmtcat(query, " add %s,", unique_index); xstrfmtcat(correct_query, " add %s,", unique_index); xfree(unique_index); } } xfree(old_index); temp2 = ending; itr = list_iterator_create(keys_list); while ((temp = strstr(temp2, ", key "))) { int open = 0, close = 0, name_end = 0; int end = 5; char *new_key_name = NULL, *new_key = NULL; while (temp[end++]) { if (!name_end && (temp[end] == ' ')) { name_end = end; continue; } else if (temp[end] == '(') { open++; if (!name_end) name_end = end; } else if (temp[end] == ')') close++; else continue; if (open == close) break; } if (temp[end]) { end++; new_key_name = xstrndup(temp+6, name_end-6); new_key = xstrndup(temp+2, end-2); // skip ', ' while ((db_key = list_next(itr))) { if (!strcmp(db_key->name, new_key_name)) { list_remove(itr); break; } } list_iterator_reset(itr); if (db_key) { xstrfmtcat(query, " drop key %s,", db_key->name); xstrfmtcat(correct_query, " drop key %s,", db_key->name); _destroy_db_key(db_key); } else info("adding %s to table %s", new_key, table_name); xstrfmtcat(query, " add %s,", new_key); xstrfmtcat(correct_query, " add %s,", new_key); xfree(new_key); xfree(new_key_name); } temp2 = temp + end; } /* flush extra (old) keys */ while ((db_key = list_next(itr))) { info("dropping key %s from table %s", db_key->name, table_name); xstrfmtcat(query, " drop key %s,", db_key->name); } list_iterator_destroy(itr); list_destroy(keys_list); query[strlen(query)-1] = ';'; correct_query[strlen(correct_query)-1] = ';'; //info("%d query\n%s", __LINE__, query); /* see if we have already done this definition */ if (!adding) { char *quoted = slurm_add_slash_to_quotes(query); char *query2 = xstrdup_printf("select table_name from " "%s where definition='%s'", table_defs_table, quoted); MYSQL_RES *result = NULL; MYSQL_ROW row; xfree(quoted); run_update = 1; if ((result = mysql_db_query_ret(mysql_conn, query2, 0))) { if ((row = mysql_fetch_row(result))) run_update = 0; mysql_free_result(result); } xfree(query2); if (run_update) { run_update = 2; query2 = xstrdup_printf("select table_name from " "%s where table_name='%s'", table_defs_table, table_name); if ((result = mysql_db_query_ret( mysql_conn, query2, 0))) { if ((row = mysql_fetch_row(result))) run_update = 1; mysql_free_result(result); } xfree(query2); } } /* if something has changed run the alter line */ if (run_update || adding) { time_t now = time(NULL); char *query2 = NULL; char *quoted = NULL; if (run_update == 2) debug4("Table %s doesn't exist, adding", table_name); else debug("Table %s has changed. Updating...", table_name); if (mysql_db_query(mysql_conn, query)) { xfree(query); return SLURM_ERROR; } quoted = slurm_add_slash_to_quotes(correct_query); query2 = xstrdup_printf("insert into %s (creation_time, " "mod_time, table_name, definition) " "values (%ld, %ld, '%s', '%s') " "on duplicate key update " "definition='%s', mod_time=%ld;", table_defs_table, now, now, table_name, quoted, quoted, now); xfree(quoted); if (mysql_db_query(mysql_conn, query2)) { xfree(query2); return SLURM_ERROR; } xfree(query2); } xfree(query); xfree(correct_query); query = xstrdup_printf("make table current %s", table_name); END_TIMER2(query); xfree(query); return SLURM_SUCCESS; }
/*****************************************************************************\ * Parse, process and respond to a request \*****************************************************************************/ static void _proc_msg(slurm_fd_t new_fd, char *msg) { DEF_TIMERS; char *req, *cmd_ptr, *msg_type = NULL; char response[128]; if (new_fd < 0) return; START_TIMER; if (!msg) { err_code = -300; err_msg = "NULL request message"; error("wiki: NULL request message"); goto resp_msg; } if (_parse_msg(msg, &req) != 0) goto resp_msg; cmd_ptr = strstr(req, "CMD="); if (cmd_ptr == NULL) { err_code = -300; err_msg = "request lacks CMD"; error("wiki: request lacks CMD"); goto resp_msg; } cmd_ptr +=4; err_code = 0; if (strncmp(cmd_ptr, "GETJOBS", 7) == 0) { msg_type = "wiki:GETJOBS"; if (!get_jobs(cmd_ptr, &err_code, &err_msg)) goto free_resp_msg; } else if (strncmp(cmd_ptr, "GETNODES", 8) == 0) { msg_type = "wiki:GETNODES"; if (!get_nodes(cmd_ptr, &err_code, &err_msg)) goto free_resp_msg; } else if (strncmp(cmd_ptr, "STARTJOB", 8) == 0) { msg_type = "wiki:STARTJOB"; start_job(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "CANCELJOB", 9) == 0) { msg_type = "wiki:CANCELJOB"; cancel_job(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "REQUEUEJOB", 10) == 0) { msg_type = "wiki:REQUEUEJOB"; job_requeue_wiki(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "SUSPENDJOB", 10) == 0) { msg_type = "wiki:SUSPENDJOB"; suspend_job(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "RESUMEJOB", 9) == 0) { msg_type = "wiki:RESUMEJOB"; resume_job(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "JOBADDTASK", 10) == 0) { msg_type = "wiki:JOBADDTASK"; job_add_task(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "JOBRELEASETASK", 14) == 0) { msg_type = "wiki:JOBRELEASETASK"; job_release_task(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "JOBWILLRUN", 10) == 0) { msg_type = "wiki:JOBWILLRUN"; if (strstr(cmd_ptr, "NODES=")) { /* Updated format input and output */ if (!job_will_run2(cmd_ptr, &err_code, &err_msg)) goto free_resp_msg; } else { if (!job_will_run(cmd_ptr, &err_code, &err_msg)) goto free_resp_msg; } } else if (strncmp(cmd_ptr, "MODIFYJOB", 9) == 0) { msg_type = "wiki:MODIFYJOB"; job_modify_wiki(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "NOTIFYJOB", 9) == 0) { msg_type = "wiki:NOTIFYJOB"; job_notify_wiki(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "SIGNALJOB", 9) == 0) { msg_type = "wiki:SIGNALJOB"; job_signal_wiki(cmd_ptr, &err_code, &err_msg); } else if (strncmp(cmd_ptr, "INITIALIZE", 10) == 0) { msg_type = "wiki:INITIALIZE"; initialize_wiki(cmd_ptr, &err_code, &err_msg); } else { err_code = -300; err_msg = "unsupported request type"; error("wiki: unrecognized request type: %s", req); } END_TIMER2(msg_type); resp_msg: snprintf(response, sizeof(response), "SC=%d RESPONSE=%s", err_code, err_msg); _send_reply(new_fd, response); return; free_resp_msg: /* Message is pre-formatted by get_jobs and get_nodes * ONLY if no error. Send message and xfree the buffer. */ _send_reply(new_fd, err_msg); xfree(err_msg); return; }
/* dump_all_front_end_state - save the state of all front_end nodes to file */ extern int dump_all_front_end_state(void) { #ifdef HAVE_FRONT_END /* Save high-water mark to avoid buffer growth with copies */ static int high_buffer_size = (1024 * 1024); int error_code = 0, i, log_fd; char *old_file, *new_file, *reg_file; front_end_record_t *front_end_ptr; /* Locks: Read config and node */ slurmctld_lock_t node_read_lock = { READ_LOCK, NO_LOCK, READ_LOCK, NO_LOCK }; Buf buffer = init_buf(high_buffer_size); DEF_TIMERS; START_TIMER; /* write header: version, time */ packstr(FRONT_END_STATE_VERSION, buffer); pack_time(time(NULL), buffer); /* write node records to buffer */ lock_slurmctld (node_read_lock); for (i = 0, front_end_ptr = front_end_nodes; i < front_end_node_cnt; i++, front_end_ptr++) { xassert(front_end_ptr->magic == FRONT_END_MAGIC); _dump_front_end_state(front_end_ptr, buffer); } old_file = xstrdup (slurmctld_conf.state_save_location); xstrcat (old_file, "/front_end_state.old"); reg_file = xstrdup (slurmctld_conf.state_save_location); xstrcat (reg_file, "/front_end_state"); new_file = xstrdup (slurmctld_conf.state_save_location); xstrcat (new_file, "/front_end_state.new"); unlock_slurmctld (node_read_lock); /* write the buffer to file */ lock_state_files(); log_fd = creat (new_file, 0600); if (log_fd < 0) { error ("Can't save state, error creating file %s %m", new_file); error_code = errno; } else { int pos = 0, nwrite = get_buf_offset(buffer), amount, rc; char *data = (char *)get_buf_data(buffer); high_buffer_size = MAX(nwrite, high_buffer_size); while (nwrite > 0) { amount = write(log_fd, &data[pos], nwrite); if ((amount < 0) && (errno != EINTR)) { error("Error writing file %s, %m", new_file); error_code = errno; break; } nwrite -= amount; pos += amount; } rc = fsync_and_close(log_fd, "front_end"); if (rc && !error_code) error_code = rc; } if (error_code) (void) unlink (new_file); else { /* file shuffle */ (void) unlink (old_file); if (link(reg_file, old_file)) debug4("unable to create link for %s -> %s: %m", reg_file, old_file); (void) unlink (reg_file); if (link(new_file, reg_file)) debug4("unable to create link for %s -> %s: %m", new_file, reg_file); (void) unlink (new_file); } xfree (old_file); xfree (reg_file); xfree (new_file); unlock_state_files (); free_buf (buffer); END_TIMER2("dump_all_front_end_state"); return error_code; #else return SLURM_SUCCESS; #endif }
/* dump_all_part_state - save the state of all partitions to file */ int dump_all_part_state(void) { /* Save high-water mark to avoid buffer growth with copies */ static int high_buffer_size = BUF_SIZE; ListIterator part_iterator; struct part_record *part_ptr; int error_code = 0, log_fd; char *old_file, *new_file, *reg_file; /* Locks: Read partition */ slurmctld_lock_t part_read_lock = { READ_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; Buf buffer = init_buf(high_buffer_size); DEF_TIMERS; START_TIMER; /* write header: time */ packstr(PART_STATE_VERSION, buffer); pack_time(time(NULL), buffer); /* write partition records to buffer */ lock_slurmctld(part_read_lock); part_iterator = list_iterator_create(part_list); while ((part_ptr = (struct part_record *) list_next(part_iterator))) { xassert (part_ptr->magic == PART_MAGIC); _dump_part_state(part_ptr, buffer); } list_iterator_destroy(part_iterator); old_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(old_file, "/part_state.old"); reg_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(reg_file, "/part_state"); new_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(new_file, "/part_state.new"); unlock_slurmctld(part_read_lock); /* write the buffer to file */ lock_state_files(); log_fd = creat(new_file, 0600); if (log_fd < 0) { error("Can't save state, error creating file %s, %m", new_file); error_code = errno; } else { int pos = 0, nwrite = get_buf_offset(buffer), amount, rc; char *data = (char *)get_buf_data(buffer); high_buffer_size = MAX(nwrite, high_buffer_size); while (nwrite > 0) { amount = write(log_fd, &data[pos], nwrite); if ((amount < 0) && (errno != EINTR)) { error("Error writing file %s, %m", new_file); error_code = errno; break; } nwrite -= amount; pos += amount; } rc = fsync_and_close(log_fd, "partition"); if (rc && !error_code) error_code = rc; } if (error_code) (void) unlink(new_file); else { /* file shuffle */ (void) unlink(old_file); if (link(reg_file, old_file)) { debug4("unable to create link for %s -> %s: %m", reg_file, old_file); } (void) unlink(reg_file); if (link(new_file, reg_file)) { debug4("unable to create link for %s -> %s: %m", new_file, reg_file); } (void) unlink(new_file); } xfree(old_file); xfree(reg_file); xfree(new_file); unlock_state_files(); free_buf(buffer); END_TIMER2("dump_all_part_state"); return 0; }
extern int fed_mgr_state_save(char *state_save_location) { int error_code = 0, log_fd; char *old_file = NULL, *new_file = NULL, *reg_file = NULL; slurmctld_lock_t fed_read_lock = { NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; Buf buffer = init_buf(0); DEF_TIMERS; START_TIMER; /* write header: version, time */ pack16(SLURM_PROTOCOL_VERSION, buffer); pack_time(time(NULL), buffer); lock_slurmctld(fed_read_lock); slurmdb_pack_federation_rec(fed_mgr_fed_rec, SLURM_PROTOCOL_VERSION, buffer); unlock_slurmctld(fed_read_lock); /* write the buffer to file */ reg_file = xstrdup_printf("%s/%s", state_save_location, FED_MGR_STATE_FILE); old_file = xstrdup_printf("%s.old", reg_file); new_file = xstrdup_printf("%s.new", reg_file); log_fd = creat(new_file, 0600); if (log_fd < 0) { error("Can't save state, create file %s error %m", new_file); error_code = errno; } else { int pos = 0, nwrite = get_buf_offset(buffer), amount; char *data = (char *)get_buf_data(buffer); while (nwrite > 0) { amount = write(log_fd, &data[pos], nwrite); if ((amount < 0) && (errno != EINTR)) { error("Error writing file %s, %m", new_file); error_code = errno; break; } nwrite -= amount; pos += amount; } fsync(log_fd); close(log_fd); } if (error_code) (void) unlink(new_file); else { /* file shuffle */ (void) unlink(old_file); if (link(reg_file, old_file)) debug4("unable to create link for %s -> %s: %m", reg_file, old_file); (void) unlink(reg_file); if (link(new_file, reg_file)) debug4("unable to create link for %s -> %s: %m", new_file, reg_file); (void) unlink(new_file); } xfree(old_file); xfree(reg_file); xfree(new_file); free_buf(buffer); END_TIMER2("fed_mgr_state_save"); return error_code; }
extern int pgsql_db_make_table_current(PGconn *pgsql_db, char *schema, char *table_name, storage_field_t *fields) { char *query = NULL, *opt_part = NULL, *temp_char = NULL; char *type = NULL; int not_null = 0; char *default_str = NULL; char* original_ptr = NULL; int i = 0; PGresult *result = NULL; List columns = NULL; ListIterator itr = NULL; char *col = NULL; DEF_TIMERS; query = xstrdup_printf("select column_name from " "information_schema.columns where " "table_name='%s' and table_schema='%s' ", table_name, schema); if (!(result = pgsql_db_query_ret(pgsql_db, query))) { xfree(query); return SLURM_ERROR; } xfree(query); columns = list_create(slurm_destroy_char); for (i = 0; i < PQntuples(result); i++) { col = xstrdup(PQgetvalue(result, i, 0)); //column_name list_append(columns, col); } PQclear(result); itr = list_iterator_create(columns); query = xstrdup_printf("alter table %s.%s", schema, table_name); START_TIMER; i=0; while (fields[i].name) { int found = 0; not_null = 0; if (!strcasecmp("serial", fields[i].options)) { i++; continue; } opt_part = xstrdup(fields[i].options); original_ptr = opt_part; opt_part = strtok_r(opt_part, " ", &temp_char); if (opt_part) { /* XXX: only one identifier supported */ type = xstrdup(opt_part); opt_part = strtok_r(NULL, " ", &temp_char); while (opt_part) { if (!strcasecmp("not", opt_part)) { opt_part = strtok_r(NULL, " ", &temp_char); if (!strcasecmp("null", opt_part)) { not_null = 1; } } else if (!strcasecmp("default", opt_part)){ opt_part = strtok_r(NULL, " ", &temp_char); default_str = xstrdup(opt_part); } opt_part = strtok_r(NULL, " ", &temp_char); } } else { type = xstrdup(fields[i].options); } xfree(original_ptr); list_iterator_reset(itr); while ((col = list_next(itr))) { if (!strcmp(col, fields[i].name)) { list_delete_item(itr); found = 1; break; } } temp_char = NULL; if (!found) { info("adding column %s", fields[i].name); if (default_str) xstrfmtcat(temp_char, " default %s", default_str); if (not_null) xstrcat(temp_char, " not null"); xstrfmtcat(query, " add %s %s", fields[i].name, type); if (temp_char) xstrcat(query, temp_char); xstrcat(query, ","); } else { if (default_str) xstrfmtcat(temp_char, " alter %s set default %s,", fields[i].name, default_str); else xstrfmtcat(temp_char, " alter %s drop default,", fields[i].name); if (not_null) xstrfmtcat(temp_char, " alter %s set not null,", fields[i].name); else xstrfmtcat(temp_char, " alter %s drop not null,", fields[i].name); xstrfmtcat(query, " alter %s type %s,%s", fields[i].name, type, temp_char); } xfree(temp_char); xfree(default_str); xfree(type); i++; } list_iterator_destroy(itr); list_destroy(columns); query[strlen(query)-1] = ';'; //debug4("pgsql db create/alter table:\n %s", query); if (pgsql_db_query(pgsql_db, query)) { xfree(query); return SLURM_ERROR; } xfree(query); END_TIMER2("make table current"); return SLURM_SUCCESS; }