extern int sacctmgr_archive_dump(int argc, char *argv[]) { int rc = SLURM_SUCCESS; slurmdb_archive_cond_t *arch_cond = xmalloc(sizeof(slurmdb_archive_cond_t)); int i=0; struct stat st; for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!strncasecmp (argv[i], "Where", MAX(command_len, 5)) || !strncasecmp (argv[i], "Set", MAX(command_len, 3))) i++; _set_cond(&i, argc, argv, arch_cond); } if (!arch_cond->purge_event) arch_cond->purge_event = NO_VAL; if (!arch_cond->purge_job) arch_cond->purge_job = NO_VAL; if (!arch_cond->purge_resv) arch_cond->purge_resv = NO_VAL; if (!arch_cond->purge_step) arch_cond->purge_step = NO_VAL; if (!arch_cond->purge_suspend) arch_cond->purge_suspend = NO_VAL; if (exit_code) { slurmdb_destroy_archive_cond(arch_cond); return SLURM_ERROR; } if (arch_cond->archive_dir) { if (stat(arch_cond->archive_dir, &st) < 0) { exit_code = errno; fprintf(stderr, " dump: Failed to stat %s: %m\n " "Note: For archive dump, " "the directory must be on " "the calling host.\n", arch_cond->archive_dir); return SLURM_ERROR; } if (!(st.st_mode & S_IFDIR)) { errno = EACCES; fprintf(stderr, " dump: " "archive dir %s isn't a directory\n", arch_cond->archive_dir); return SLURM_ERROR; } if (access(arch_cond->archive_dir, W_OK) < 0) { errno = EACCES; fprintf(stderr, " dump: " "archive dir %s is not writable\n", arch_cond->archive_dir); return SLURM_ERROR; } } if (arch_cond->archive_script) { if (stat(arch_cond->archive_script, &st) < 0) { exit_code = errno; fprintf(stderr, " dump: Failed to stat %s: %m\n " "Note: For archive dump, the script must be on " "the calling host.\n", arch_cond->archive_script); return SLURM_ERROR; } if (!(st.st_mode & S_IFREG)) { errno = EACCES; fprintf(stderr, " dump: " "archive script %s isn't a regular file\n", arch_cond->archive_script); return SLURM_ERROR; } if (access(arch_cond->archive_script, X_OK) < 0) { errno = EACCES; fprintf(stderr, " dump: " "archive script %s is not executable\n", arch_cond->archive_script); return SLURM_ERROR; } } rc = jobacct_storage_g_archive(db_conn, arch_cond); if (rc == SLURM_SUCCESS) { if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } } else { exit_code=1; fprintf(stderr, " Problem dumping archive: %s\n", slurm_strerror(rc)); rc = SLURM_ERROR; } slurmdb_destroy_archive_cond(arch_cond); return rc; }
extern void specific_info_resv(popup_info_t *popup_win) { int resv_error_code = SLURM_SUCCESS; static reserve_info_msg_t *resv_info_ptr = NULL; static reserve_info_t *resv_ptr = NULL; specific_info_t *spec_info = popup_win->spec_info; sview_search_info_t *search_info = spec_info->search_info; char error_char[100]; GtkWidget *label = NULL; GtkTreeView *tree_view = NULL; List resv_list = NULL; List send_resv_list = NULL; sview_resv_info_t *sview_resv_info_ptr = NULL; int j=0, i=-1; hostset_t hostset = NULL; ListIterator itr = NULL; if (!spec_info->display_widget) { setup_popup_info(popup_win, display_data_resv, SORTID_CNT); } if (spec_info->display_widget && popup_win->toggled) { gtk_widget_destroy(spec_info->display_widget); spec_info->display_widget = NULL; goto display_it; } if ((resv_error_code = get_new_info_resv(&resv_info_ptr, popup_win->force_refresh)) == SLURM_NO_CHANGE_IN_DATA) { if (!spec_info->display_widget || spec_info->view == ERROR_VIEW) goto display_it; } else if (resv_error_code != SLURM_SUCCESS) { if (spec_info->view == ERROR_VIEW) goto end_it; spec_info->view = ERROR_VIEW; if (spec_info->display_widget) gtk_widget_destroy(spec_info->display_widget); sprintf(error_char, "get_new_info_resv: %s", slurm_strerror(slurm_get_errno())); label = gtk_label_new(error_char); gtk_table_attach_defaults(popup_win->table, label, 0, 1, 0, 1); gtk_widget_show(label); spec_info->display_widget = gtk_widget_ref(label); goto end_it; } display_it: resv_list = _create_resv_info_list(resv_info_ptr); if (!resv_list) return; if (spec_info->view == ERROR_VIEW && spec_info->display_widget) { gtk_widget_destroy(spec_info->display_widget); spec_info->display_widget = NULL; } if (spec_info->type != INFO_PAGE && !spec_info->display_widget) { tree_view = create_treeview(local_display_data, &popup_win->grid_button_list); gtk_tree_selection_set_mode( gtk_tree_view_get_selection(tree_view), GTK_SELECTION_MULTIPLE); spec_info->display_widget = gtk_widget_ref(GTK_WIDGET(tree_view)); gtk_table_attach_defaults(popup_win->table, GTK_WIDGET(tree_view), 0, 1, 0, 1); /* since this function sets the model of the tree_view to the treestore we don't really care about the return value */ create_treestore(tree_view, popup_win->display_data, SORTID_CNT, SORTID_TIME_START, SORTID_COLOR); } setup_popup_grid_list(popup_win); spec_info->view = INFO_VIEW; if (spec_info->type == INFO_PAGE) { _display_info_resv(resv_list, popup_win); goto end_it; } /* just linking to another list, don't free the inside, just the list */ send_resv_list = list_create(NULL); itr = list_iterator_create(resv_list); i = -1; while ((sview_resv_info_ptr = list_next(itr))) { i++; resv_ptr = sview_resv_info_ptr->resv_ptr; switch(spec_info->type) { case PART_PAGE: case BLOCK_PAGE: case NODE_PAGE: if (!resv_ptr->node_list) continue; if (!(hostset = hostset_create( search_info->gchar_data))) continue; if (!hostset_intersects(hostset, resv_ptr->node_list)) { hostset_destroy(hostset); continue; } hostset_destroy(hostset); break; case JOB_PAGE: if (strcmp(resv_ptr->name, search_info->gchar_data)) continue; break; case RESV_PAGE: switch(search_info->search_type) { case SEARCH_RESERVATION_NAME: if (!search_info->gchar_data) continue; if (strcmp(resv_ptr->name, search_info->gchar_data)) continue; break; default: continue; } break; default: g_print("Unknown type %d\n", spec_info->type); continue; } list_push(send_resv_list, sview_resv_info_ptr); j=0; while (resv_ptr->node_inx[j] >= 0) { change_grid_color( popup_win->grid_button_list, resv_ptr->node_inx[j], resv_ptr->node_inx[j+1], sview_resv_info_ptr->color_inx, true, 0); j += 2; } } list_iterator_destroy(itr); post_setup_popup_grid_list(popup_win); _update_info_resv(send_resv_list, GTK_TREE_VIEW(spec_info->display_widget)); list_destroy(send_resv_list); end_it: popup_win->toggled = 0; popup_win->force_refresh = 0; return; }
static void * _cancel_job_id (void *ci) { int error_code = SLURM_SUCCESS, i; job_cancel_info_t *cancel_info = (job_cancel_info_t *)ci; bool sig_set = true; uint16_t flags = 0; char *job_type = ""; DEF_TIMERS; if (cancel_info->sig == (uint16_t) NO_VAL) { cancel_info->sig = SIGKILL; sig_set = false; } if (opt.batch) { flags |= KILL_JOB_BATCH; job_type = "batch "; } if (opt.full) { flags |= KILL_FULL_JOB; job_type = "full "; } if (cancel_info->array_flag) flags |= KILL_JOB_ARRAY; if (!cancel_info->job_id_str) { if (cancel_info->array_job_id && (cancel_info->array_task_id == INFINITE)) { xstrfmtcat(cancel_info->job_id_str, "%u_*", cancel_info->array_job_id); } else if (cancel_info->array_job_id) { xstrfmtcat(cancel_info->job_id_str, "%u_%u", cancel_info->array_job_id, cancel_info->array_task_id); } else { xstrfmtcat(cancel_info->job_id_str, "%u", cancel_info->job_id); } } if (!sig_set) { verbose("Terminating %sjob %s", job_type, cancel_info->job_id_str); } else { verbose("Signal %u to %sjob %s", cancel_info->sig, job_type, cancel_info->job_id_str); } for (i = 0; i < MAX_CANCEL_RETRY; i++) { _add_delay(); START_TIMER; error_code = slurm_kill_job2(cancel_info->job_id_str, cancel_info->sig, flags); END_TIMER; slurm_mutex_lock(&max_delay_lock); max_resp_time = MAX(max_resp_time, DELTA_TIMER); slurm_mutex_unlock(&max_delay_lock); if ((error_code == 0) || (errno != ESLURM_TRANSITION_STATE_NO_UPDATE)) break; verbose("Job is in transistional state, retrying"); sleep(5 + i); } if (error_code) { error_code = slurm_get_errno(); if ((opt.verbose > 0) || ((error_code != ESLURM_ALREADY_DONE) && (error_code != ESLURM_INVALID_JOB_ID))) { error("Kill job error on job id %s: %s", cancel_info->job_id_str, slurm_strerror(slurm_get_errno())); } if (((error_code == ESLURM_ALREADY_DONE) || (error_code == ESLURM_INVALID_JOB_ID)) && (cancel_info->sig == SIGKILL)) { error_code = 0; /* Ignore error if job done */ } } /* Purposely free the struct passed in here, so the caller doesn't have * to keep track of it, but don't destroy the mutex and condition * variables contained. */ slurm_mutex_lock(cancel_info->num_active_threads_lock); *(cancel_info->rc) = MAX(*(cancel_info->rc), error_code); (*(cancel_info->num_active_threads))--; slurm_cond_signal(cancel_info->num_active_threads_cond); slurm_mutex_unlock(cancel_info->num_active_threads_lock); xfree(cancel_info->job_id_str); xfree(cancel_info); return NULL; }
extern int sacctmgr_modify_qos(int argc, char *argv[]) { int rc = SLURM_SUCCESS; slurmdb_qos_cond_t *qos_cond = xmalloc(sizeof(slurmdb_qos_cond_t)); slurmdb_qos_rec_t *qos = xmalloc(sizeof(slurmdb_qos_rec_t)); int i=0; int cond_set = 0, rec_set = 0, set = 0; List ret_list = NULL; slurmdb_init_qos_rec(qos, 0, NO_VAL); for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))) { i++; cond_set += _set_cond(&i, argc, argv, qos_cond, NULL); } else if (!strncasecmp(argv[i], "Set", MAX(command_len, 3))) { i++; rec_set += _set_rec(&i, argc, argv, NULL, qos); } else { cond_set += _set_cond(&i, argc, argv, qos_cond, NULL); } } if (exit_code) { slurmdb_destroy_qos_cond(qos_cond); slurmdb_destroy_qos_rec(qos); return SLURM_ERROR; } else if (!rec_set) { exit_code=1; fprintf(stderr, " You didn't give me anything to set\n"); slurmdb_destroy_qos_cond(qos_cond); slurmdb_destroy_qos_rec(qos); return SLURM_ERROR; } else if (!cond_set) { if (!commit_check("You didn't set any conditions with 'WHERE'.\n" "Are you sure you want to continue?")) { printf("Aborted\n"); slurmdb_destroy_qos_cond(qos_cond); slurmdb_destroy_qos_rec(qos); return SLURM_SUCCESS; } } // Special case: reset raw usage only if (qos->usage) { rc = SLURM_ERROR; if (qos->usage->usage_raw == 0.0) rc = sacctmgr_remove_qos_usage(qos_cond); else error("Raw usage can only be set to 0 (zero)"); slurmdb_destroy_qos_cond(qos_cond); slurmdb_destroy_qos_rec(qos); return rc; } notice_thread_init(); ret_list = acct_storage_g_modify_qos(db_conn, my_uid, qos_cond, qos); if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); printf(" Modified qos...\n"); while((object = list_next(itr))) { printf(" %s\n", object); } list_iterator_destroy(itr); set = 1; } else if (ret_list) { printf(" Nothing modified\n"); rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); rc = SLURM_ERROR; } if (ret_list) list_destroy(ret_list); notice_thread_fini(); if (set) { if (commit_check("Would you like to commit changes?")) acct_storage_g_commit(db_conn, 1); else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } } slurmdb_destroy_qos_cond(qos_cond); slurmdb_destroy_qos_rec(qos); return rc; }
/* * slurm_allocate_resources_blocking * allocate resources for a job request. This call will block until * the allocation is granted, or the specified timeout limit is reached. * IN req - description of resource allocation request * IN timeout - amount of time, in seconds, to wait for a response before * giving up. * A timeout of zero will wait indefinitely. * IN pending_callback - If the allocation cannot be granted immediately, * the controller will put the job in the PENDING state. If * pending callback is not NULL, it will be called with the job_id * of the pending job as the sole parameter. * * RET allocation structure on success, NULL on error set errno to * indicate the error (errno will be ETIMEDOUT if the timeout is reached * with no allocation granted) * NOTE: free the response using slurm_free_resource_allocation_response_msg() */ resource_allocation_response_msg_t * slurm_allocate_resources_blocking (const job_desc_msg_t *user_req, time_t timeout, void(*pending_callback)(uint32_t job_id)) { int rc; slurm_msg_t req_msg; slurm_msg_t resp_msg; resource_allocation_response_msg_t *resp = NULL; char *hostname = NULL; uint32_t job_id; job_desc_msg_t *req; listen_t *listen = NULL; int errnum = SLURM_SUCCESS; slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); /* make a copy of the user's job description struct so that we * can make changes before contacting the controller */ req = (job_desc_msg_t *)xmalloc(sizeof(job_desc_msg_t)); if (req == NULL) return NULL; memcpy(req, user_req, sizeof(job_desc_msg_t)); /* * set Node and session id for this request */ if (req->alloc_sid == NO_VAL) req->alloc_sid = getsid(0); if (user_req->alloc_node != NULL) { req->alloc_node = xstrdup(user_req->alloc_node); } else if ((hostname = xshort_hostname()) != NULL) { req->alloc_node = hostname; } else { error("Could not get local hostname," " forcing immediate allocation mode."); req->immediate = 1; } if (!req->immediate) { listen = _create_allocation_response_socket(hostname); if (listen == NULL) { xfree(req); return NULL; } req->alloc_resp_port = listen->port; } req_msg.msg_type = REQUEST_RESOURCE_ALLOCATION; req_msg.data = req; rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg); if (rc == SLURM_SOCKET_ERROR) { int errnum = errno; destroy_forward(&req_msg.forward); destroy_forward(&resp_msg.forward); if (!req->immediate) _destroy_allocation_response_socket(listen); xfree(req); errno = errnum; return NULL; } switch (resp_msg.msg_type) { case RESPONSE_SLURM_RC: if (_handle_rc_msg(&resp_msg) < 0) { /* will reach this when the allocation fails */ errnum = errno; } else { /* shouldn't get here */ errnum = -1; } break; case RESPONSE_RESOURCE_ALLOCATION: /* Yay, the controller has acknowledged our request! But did we really get an allocation yet? */ resp = (resource_allocation_response_msg_t *) resp_msg.data; if (resp->node_cnt > 0) { /* yes, allocation has been granted */ errno = SLURM_PROTOCOL_SUCCESS; } else if (!req->immediate) { if (resp->error_code != SLURM_SUCCESS) info("%s", slurm_strerror(resp->error_code)); /* no, we need to wait for a response */ job_id = resp->job_id; slurm_free_resource_allocation_response_msg(resp); if (pending_callback != NULL) pending_callback(job_id); resp = _wait_for_allocation_response(job_id, listen, timeout); /* If NULL, we didn't get the allocation in the time desired, so just free the job id */ if ((resp == NULL) && (errno != ESLURM_ALREADY_DONE)) { errnum = errno; slurm_complete_job(job_id, -1); } } break; default: errnum = SLURM_UNEXPECTED_MSG_ERROR; resp = NULL; } destroy_forward(&req_msg.forward); destroy_forward(&resp_msg.forward); if (!req->immediate) _destroy_allocation_response_socket(listen); xfree(req); errno = errnum; return resp; }
extern int sacctmgr_add_account(int argc, char **argv) { int rc = SLURM_SUCCESS; int i=0; ListIterator itr = NULL, itr_c = NULL; slurmdb_account_rec_t *acct = NULL; slurmdb_assoc_rec_t *assoc = NULL; slurmdb_assoc_cond_t assoc_cond; List name_list = list_create(slurm_destroy_char); List cluster_list = list_create(slurm_destroy_char); char *cluster = NULL; char *name = NULL; List acct_list = NULL; List assoc_list = NULL; List local_assoc_list = NULL; List local_account_list = NULL; char *acct_str = NULL; char *assoc_str = NULL; int limit_set = 0; slurmdb_account_rec_t *start_acct = xmalloc(sizeof(slurmdb_account_rec_t)); slurmdb_assoc_rec_t *start_assoc = xmalloc(sizeof(slurmdb_assoc_rec_t)); slurmdb_init_assoc_rec(start_assoc, 0); for (i = 0; i < argc; i++) { int command_len = strlen(argv[i]); if (!xstrncasecmp(argv[i], "Where", MAX(command_len, 5)) || !xstrncasecmp(argv[i], "Set", MAX(command_len, 3))) i++; limit_set += _set_rec(&i, argc, argv, name_list, cluster_list, start_acct, start_assoc); } if (exit_code) { slurmdb_destroy_assoc_rec(start_assoc); slurmdb_destroy_account_rec(start_acct); return SLURM_ERROR; } if (!name_list || !list_count(name_list)) { FREE_NULL_LIST(name_list); FREE_NULL_LIST(cluster_list); slurmdb_destroy_assoc_rec(start_assoc); slurmdb_destroy_account_rec(start_acct); exit_code = 1; fprintf(stderr, " Need name of account to add.\n"); return SLURM_SUCCESS; } else { slurmdb_account_cond_t account_cond; memset(&account_cond, 0, sizeof(slurmdb_account_cond_t)); memset(&assoc_cond, 0, sizeof(slurmdb_assoc_cond_t)); assoc_cond.acct_list = name_list; account_cond.assoc_cond = &assoc_cond; local_account_list = slurmdb_accounts_get( db_conn, &account_cond); } if (!local_account_list) { exit_code = 1; fprintf(stderr, " Problem getting accounts from database. " "Contact your admin.\n"); FREE_NULL_LIST(name_list); FREE_NULL_LIST(cluster_list); slurmdb_destroy_assoc_rec(start_assoc); slurmdb_destroy_account_rec(start_acct); return SLURM_ERROR; } if (!start_assoc->parent_acct) start_assoc->parent_acct = xstrdup("root"); if (!cluster_list || !list_count(cluster_list)) { slurmdb_cluster_rec_t *cluster_rec = NULL; List tmp_list = slurmdb_clusters_get(db_conn, NULL); if (!tmp_list) { exit_code=1; fprintf(stderr, " Problem getting clusters from database. " "Contact your admin.\n"); FREE_NULL_LIST(name_list); FREE_NULL_LIST(cluster_list); slurmdb_destroy_assoc_rec(start_assoc); slurmdb_destroy_account_rec(start_acct); FREE_NULL_LIST(local_account_list); return SLURM_ERROR; } if (!list_count(tmp_list)) { exit_code=1; fprintf(stderr, " Can't add accounts, no cluster " "defined yet.\n" " Please contact your administrator.\n"); FREE_NULL_LIST(name_list); FREE_NULL_LIST(cluster_list); slurmdb_destroy_assoc_rec(start_assoc); slurmdb_destroy_account_rec(start_acct); FREE_NULL_LIST(local_account_list); return SLURM_ERROR; } if (!cluster_list) list_create(slurm_destroy_char); else list_flush(cluster_list); itr_c = list_iterator_create(tmp_list); while((cluster_rec = list_next(itr_c))) { list_append(cluster_list, xstrdup(cluster_rec->name)); } list_iterator_destroy(itr_c); FREE_NULL_LIST(tmp_list); } else if (sacctmgr_validate_cluster_list(cluster_list) != SLURM_SUCCESS) { slurmdb_destroy_assoc_rec(start_assoc); slurmdb_destroy_account_rec(start_acct); FREE_NULL_LIST(local_account_list); return SLURM_ERROR; } acct_list = list_create(slurmdb_destroy_account_rec); assoc_list = list_create(slurmdb_destroy_assoc_rec); memset(&assoc_cond, 0, sizeof(slurmdb_assoc_cond_t)); assoc_cond.acct_list = list_create(NULL); itr = list_iterator_create(name_list); while((name = list_next(itr))) list_append(assoc_cond.acct_list, name); list_iterator_destroy(itr); list_append(assoc_cond.acct_list, start_assoc->parent_acct); assoc_cond.cluster_list = cluster_list; local_assoc_list = slurmdb_associations_get( db_conn, &assoc_cond); FREE_NULL_LIST(assoc_cond.acct_list); if (!local_assoc_list) { exit_code=1; fprintf(stderr, " Problem getting associations from database. " "Contact your admin.\n"); FREE_NULL_LIST(name_list); FREE_NULL_LIST(cluster_list); slurmdb_destroy_assoc_rec(start_assoc); slurmdb_destroy_account_rec(start_acct); FREE_NULL_LIST(local_account_list); return SLURM_ERROR; } itr = list_iterator_create(name_list); while((name = list_next(itr))) { if (!name[0]) { exit_code=1; fprintf(stderr, " No blank names are " "allowed when adding.\n"); rc = SLURM_ERROR; continue; } acct = NULL; if (!sacctmgr_find_account_from_list(local_account_list, name)) { acct = xmalloc(sizeof(slurmdb_account_rec_t)); acct->assoc_list = list_create(slurmdb_destroy_assoc_rec); acct->name = xstrdup(name); if (start_acct->description) acct->description = xstrdup(start_acct->description); else acct->description = xstrdup(name); if (start_acct->organization) acct->organization = xstrdup(start_acct->organization); else if (xstrcmp(start_assoc->parent_acct, "root")) acct->organization = xstrdup(start_assoc->parent_acct); else acct->organization = xstrdup(name); xstrfmtcat(acct_str, " %s\n", name); list_append(acct_list, acct); } itr_c = list_iterator_create(cluster_list); while((cluster = list_next(itr_c))) { if (sacctmgr_find_account_base_assoc_from_list( local_assoc_list, name, cluster)) { //printf(" already have this assoc\n"); continue; } if (!sacctmgr_find_account_base_assoc_from_list( local_assoc_list, start_assoc->parent_acct, cluster)) { exit_code=1; fprintf(stderr, " Parent account '%s' " "doesn't exist on " "cluster %s\n" " Contact your admin " "to add this account.\n", start_assoc->parent_acct, cluster); continue; } assoc = xmalloc(sizeof(slurmdb_assoc_rec_t)); slurmdb_init_assoc_rec(assoc, 0); assoc->acct = xstrdup(name); assoc->cluster = xstrdup(cluster); assoc->def_qos_id = start_assoc->def_qos_id; assoc->parent_acct = xstrdup(start_assoc->parent_acct); assoc->shares_raw = start_assoc->shares_raw; slurmdb_copy_assoc_rec_limits(assoc, start_assoc); if (acct) list_append(acct->assoc_list, assoc); else list_append(assoc_list, assoc); xstrfmtcat(assoc_str, " A = %-10.10s" " C = %-10.10s\n", assoc->acct, assoc->cluster); } list_iterator_destroy(itr_c); } list_iterator_destroy(itr); FREE_NULL_LIST(local_account_list); FREE_NULL_LIST(local_assoc_list); if (!list_count(acct_list) && !list_count(assoc_list)) { printf(" Nothing new added.\n"); rc = SLURM_ERROR; goto end_it; } else if (!assoc_str) { exit_code=1; fprintf(stderr, " No associations created.\n"); goto end_it; } if (acct_str) { printf(" Adding Account(s)\n%s", acct_str); printf(" Settings\n"); if (start_acct->description) printf(" Description = %s\n", start_acct->description); else printf(" Description = %s\n", "Account Name"); if (start_acct->organization) printf(" Organization = %s\n", start_acct->organization); else printf(" Organization = %s\n", "Parent/Account Name"); xfree(acct_str); } if (assoc_str) { printf(" Associations\n%s", assoc_str); xfree(assoc_str); } if (limit_set) { printf(" Settings\n"); sacctmgr_print_assoc_limits(start_assoc); } notice_thread_init(); if (list_count(acct_list)) rc = slurmdb_accounts_add(db_conn, acct_list); if (rc == SLURM_SUCCESS) { if (list_count(assoc_list)) rc = slurmdb_associations_add(db_conn, assoc_list); } else { exit_code=1; fprintf(stderr, " Problem adding accounts: %s\n", slurm_strerror(rc)); rc = SLURM_ERROR; notice_thread_fini(); goto end_it; } notice_thread_fini(); if (rc == SLURM_SUCCESS) { if (commit_check("Would you like to commit changes?")) { slurmdb_connection_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); slurmdb_connection_commit(db_conn, 0); } } else { exit_code=1; fprintf(stderr, " error: Problem adding account associations: %s\n", slurm_strerror(rc)); rc = SLURM_ERROR; } end_it: FREE_NULL_LIST(name_list); FREE_NULL_LIST(cluster_list); FREE_NULL_LIST(acct_list); FREE_NULL_LIST(assoc_list); slurmdb_destroy_assoc_rec(start_assoc); slurmdb_destroy_account_rec(start_acct); return rc; }
extern int sacctmgr_delete_qos(int argc, char *argv[]) { int rc = SLURM_SUCCESS; slurmdb_qos_cond_t *qos_cond = xmalloc(sizeof(slurmdb_qos_cond_t)); int i=0; List ret_list = NULL; int set = 0; for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!strncasecmp (argv[i], "Where", MAX(command_len, 5)) || !strncasecmp (argv[i], "Set", MAX(command_len, 3))) i++; set += _set_cond(&i, argc, argv, qos_cond, NULL); } if (!set) { exit_code=1; fprintf(stderr, " No conditions given to remove, not executing.\n"); slurmdb_destroy_qos_cond(qos_cond); return SLURM_ERROR; } else if (set == -1) { slurmdb_destroy_qos_cond(qos_cond); return SLURM_ERROR; } if (!g_qos_list) g_qos_list = acct_storage_g_get_qos( db_conn, my_uid, NULL); notice_thread_init(); ret_list = acct_storage_g_remove_qos(db_conn, my_uid, qos_cond); notice_thread_fini(); slurmdb_destroy_qos_cond(qos_cond); if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = NULL; /* Check to see if person is trying to remove a default * qos of an association. _isdefault only works with the * output from acct_storage_g_remove_qos, and * with a previously got g_qos_list. */ if (_isdefault(ret_list)) { exit_code=1; fprintf(stderr, " Please either remove the qos' listed " "above from list and resubmit,\n" " or change the default qos to " "remove the qos.\n" " Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); goto end_it; } itr = list_iterator_create(ret_list); printf(" Deleting QOS(s)...\n"); while((object = list_next(itr))) { printf(" %s\n", object); } list_iterator_destroy(itr); if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } } else if (ret_list) { printf(" Nothing deleted\n"); rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); rc = SLURM_ERROR; } end_it: if (ret_list) list_destroy(ret_list); return rc; }
static void _admin_front_end(GtkTreeModel *model, GtkTreeIter *iter, char *type, char *node_list) { uint16_t state = NO_VAL16; update_front_end_msg_t front_end_update_msg; char *new_type = NULL, *reason = NULL; char tmp_char[100]; char *lower; int rc; GtkWidget *label = NULL; GtkWidget *entry = NULL; GtkWidget *popup = NULL; if (cluster_flags & CLUSTER_FLAG_FED) { display_fed_disabled_popup(type); global_entry_changed = 0; return; } popup = gtk_dialog_new_with_buttons( type, GTK_WINDOW(main_window), GTK_DIALOG_MODAL | GTK_DIALOG_DESTROY_WITH_PARENT, NULL); gtk_window_set_type_hint(GTK_WINDOW(popup), GDK_WINDOW_TYPE_HINT_NORMAL); gtk_window_set_transient_for(GTK_WINDOW(popup), NULL); label = gtk_dialog_add_button(GTK_DIALOG(popup), GTK_STOCK_YES, GTK_RESPONSE_OK); gtk_window_set_default(GTK_WINDOW(popup), label); gtk_dialog_add_button(GTK_DIALOG(popup), GTK_STOCK_CANCEL, GTK_RESPONSE_CANCEL); if (!xstrncasecmp("Drain", type, 5)) { new_type = "DRAIN"; reason = "\n\nPlease enter reason."; state = NODE_STATE_DRAIN; entry = create_entry(); } else if (!xstrncasecmp("Resume", type, 6)) { new_type = "RESUME"; reason = ""; state = NODE_RESUME; } snprintf(tmp_char, sizeof(tmp_char), "Are you sure you want to set state of front end node %s " "to %s?%s", node_list, new_type, reason); label = gtk_label_new(tmp_char); gtk_box_pack_start(GTK_BOX(GTK_DIALOG(popup)->vbox), label, false, false, 0); if (entry) gtk_box_pack_start(GTK_BOX(GTK_DIALOG(popup)->vbox), entry, true, true, 0); gtk_widget_show_all(popup); rc = gtk_dialog_run (GTK_DIALOG(popup)); slurm_init_update_front_end_msg(&front_end_update_msg); if (rc == GTK_RESPONSE_OK) { front_end_update_msg.name = node_list; front_end_update_msg.node_state = state; if (entry) { front_end_update_msg.reason = xstrdup( gtk_entry_get_text(GTK_ENTRY(entry))); if (!front_end_update_msg.reason || !strlen(front_end_update_msg.reason)) { lower = g_strdup_printf( "You need a reason to do that."); display_edit_note(lower); g_free(lower); goto end_it; } rc = uid_from_string(getlogin(), &front_end_update_msg.reason_uid); if (rc < 0) front_end_update_msg.reason_uid = getuid(); } rc = slurm_update_front_end(&front_end_update_msg); if (rc == SLURM_SUCCESS) { lower = g_strdup_printf( "Nodes %s updated successfully.", node_list); display_edit_note(lower); g_free(lower); } else { lower = g_strdup_printf( "Problem updating nodes %s: %s", node_list, slurm_strerror(rc)); display_edit_note(lower); g_free(lower); } } end_it: global_entry_changed = 0; xfree(front_end_update_msg.reason); gtk_widget_destroy(popup); if (got_edit_signal) { type = got_edit_signal; got_edit_signal = NULL; _admin_front_end(model, iter, type, node_list); xfree(type); } return; }
extern void get_info_front_end(GtkTable *table, display_data_t *display_data) { int error_code = SLURM_SUCCESS; List info_list = NULL; static int view = -1; static front_end_info_msg_t *front_end_info_ptr = NULL; char error_char[100]; GtkWidget *label = NULL; GtkTreeView *tree_view = NULL; static GtkWidget *display_widget = NULL; int changed = 1, j; ListIterator itr = NULL; GtkTreePath *path = NULL; static bool set_opts = false; if (!set_opts) set_page_opts(FRONT_END_PAGE, display_data_front_end, SORTID_CNT, _initial_page_opts); set_opts = true; /* reset */ if (!table && !display_data) { if (display_widget) gtk_widget_destroy(display_widget); display_widget = NULL; front_end_info_ptr = NULL; goto reset_curs; } if (display_data) local_display_data = display_data; if (!table) { display_data_front_end->set_menu = local_display_data->set_menu; goto reset_curs; } if (cluster_flags & CLUSTER_FLAG_FED) { view = ERROR_VIEW; if (display_widget) gtk_widget_destroy(display_widget); label = gtk_label_new("Not available in a federated view"); gtk_table_attach_defaults(GTK_TABLE(table), label, 0, 1, 0, 1); gtk_widget_show(label); display_widget = gtk_widget_ref(label); goto end_it; } if (display_widget && toggled) { gtk_widget_destroy(display_widget); display_widget = NULL; goto display_it; } error_code = get_new_info_front_end(&front_end_info_ptr, force_refresh); if (error_code == SLURM_NO_CHANGE_IN_DATA) { changed = 0; } else if (error_code != SLURM_SUCCESS) { if (view == ERROR_VIEW) goto end_it; if (display_widget) gtk_widget_destroy(display_widget); view = ERROR_VIEW; sprintf(error_char, "slurm_load_front_end: %s", slurm_strerror(slurm_get_errno())); label = gtk_label_new(error_char); gtk_table_attach_defaults(table, label, 0, 1, 0, 1); gtk_widget_show(label); display_widget = gtk_widget_ref(GTK_WIDGET(label)); goto end_it; } display_it: info_list = _create_front_end_info_list(front_end_info_ptr, changed); if (!info_list) goto reset_curs; /* set up the grid */ if (display_widget && GTK_IS_TREE_VIEW(display_widget) && gtk_tree_selection_count_selected_rows( gtk_tree_view_get_selection( GTK_TREE_VIEW(display_widget)))) { GtkTreeViewColumn *focus_column = NULL; /* highlight the correct nodes from the last selection */ gtk_tree_view_get_cursor(GTK_TREE_VIEW(display_widget), &path, &focus_column); } if (!path) { sview_front_end_info_t *fe_ptr; itr = list_iterator_create(info_list); while ((fe_ptr = list_next(itr))) { j = 0; while (fe_ptr->node_inx[j] >= 0) { change_grid_color(grid_button_list, fe_ptr->node_inx[j], fe_ptr->node_inx[j+1], fe_ptr->color_inx, true, 0); j += 2; } } list_iterator_destroy(itr); change_grid_color(grid_button_list, -1, -1, MAKE_WHITE, true, 0); } else { highlight_grid(GTK_TREE_VIEW(display_widget), SORTID_NODE_INX, SORTID_COLOR_INX, grid_button_list); gtk_tree_path_free(path); } if (view == ERROR_VIEW && display_widget) { gtk_widget_destroy(display_widget); display_widget = NULL; } if (!display_widget) { tree_view = create_treeview(local_display_data, &grid_button_list); gtk_tree_selection_set_mode( gtk_tree_view_get_selection(tree_view), GTK_SELECTION_MULTIPLE); display_widget = gtk_widget_ref(GTK_WIDGET(tree_view)); gtk_table_attach_defaults(table, GTK_WIDGET(tree_view), 0, 1, 0, 1); /* since this function sets the model of the tree_view to the treestore we don't really care about the return value */ create_treestore(tree_view, display_data_front_end, SORTID_CNT, SORTID_NAME, SORTID_COLOR); } view = INFO_VIEW; _update_info_front_end(info_list, GTK_TREE_VIEW(display_widget)); end_it: toggled = false; force_refresh = false; reset_curs: if (main_window && main_window->window) gdk_window_set_cursor(main_window->window, NULL); return; }
extern int sacctmgr_list_txn(int argc, char *argv[]) { int rc = SLURM_SUCCESS; slurmdb_txn_cond_t *txn_cond = xmalloc(sizeof(slurmdb_txn_cond_t)); List txn_list = NULL; slurmdb_txn_rec_t *txn = NULL; int i=0; ListIterator itr = NULL; ListIterator itr2 = NULL; int field_count = 0; print_field_t *field = NULL; List format_list = list_create(slurm_destroy_char); List print_fields_list; /* types are of print_field_t */ for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!strncasecmp (argv[i], "Where", MAX(command_len, 5)) || !strncasecmp (argv[i], "Set", MAX(command_len, 3))) i++; _set_cond(&i, argc, argv, txn_cond, format_list); } if (exit_code) { slurmdb_destroy_txn_cond(txn_cond); list_destroy(format_list); return SLURM_ERROR; } if (!list_count(format_list)) { slurm_addto_char_list(format_list, "T,Action,Actor,Where,Info"); if (txn_cond->with_assoc_info) slurm_addto_char_list(format_list, "User,Account,Cluster"); } print_fields_list = sacctmgr_process_format_list(format_list); list_destroy(format_list); if (exit_code) { list_destroy(print_fields_list); return SLURM_ERROR; } txn_list = acct_storage_g_get_txn(db_conn, my_uid, txn_cond); slurmdb_destroy_txn_cond(txn_cond); if (!txn_list) { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); list_destroy(print_fields_list); return SLURM_ERROR; } itr = list_iterator_create(txn_list); itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); field_count = list_count(print_fields_list); while((txn = list_next(itr))) { int curr_inx = 1; while((field = list_next(itr2))) { switch(field->type) { case PRINT_ACCT: field->print_routine(field, txn->accts, (curr_inx == field_count)); break; case PRINT_ACTIONRAW: field->print_routine( field, txn->action, (curr_inx == field_count)); break; case PRINT_ACTION: field->print_routine( field, slurmdbd_msg_type_2_str(txn->action, 0), (curr_inx == field_count)); break; case PRINT_ACTOR: field->print_routine(field, txn->actor_name, (curr_inx == field_count)); break; case PRINT_CLUSTER: field->print_routine(field, txn->clusters, (curr_inx == field_count)); break; case PRINT_ID: field->print_routine(field, txn->id, (curr_inx == field_count)); break; case PRINT_INFO: field->print_routine(field, txn->set_info, (curr_inx == field_count)); break; case PRINT_TS: field->print_routine(field, txn->timestamp, (curr_inx == field_count)); break; case PRINT_USER: field->print_routine(field, txn->users, (curr_inx == field_count)); break; case PRINT_WHERE: field->print_routine(field, txn->where_query, (curr_inx == field_count)); break; default: field->print_routine(field, NULL, (curr_inx == field_count)); break; } curr_inx++; } list_iterator_reset(itr2); printf("\n"); } list_iterator_destroy(itr2); list_iterator_destroy(itr); list_destroy(txn_list); list_destroy(print_fields_list); return rc; }
/* * slurm_job_step_get_pids - get the complete list of pids for a given * job step * * IN job_id * IN step_id * IN node_list, optional, if NULL then all nodes in step are returned. * OUT resp * RET SLURM_SUCCESS on success SLURM_ERROR else */ extern int slurm_job_step_get_pids(uint32_t job_id, uint32_t step_id, char *node_list, job_step_pids_response_msg_t **resp) { int rc = SLURM_SUCCESS; slurm_msg_t req_msg; job_step_id_msg_t req; ListIterator itr; List ret_list = NULL; ret_data_info_t *ret_data_info = NULL; slurm_step_layout_t *step_layout = NULL; job_step_pids_response_msg_t *resp_out; bool created = 0; xassert(resp); if (!node_list) { if (!(step_layout = slurm_job_step_layout_get(job_id, step_id))) { rc = errno; error("slurm_job_step_get_pids: " "problem getting step_layout for %u.%u: %s", job_id, step_id, slurm_strerror(rc)); return rc; } node_list = step_layout->node_list; } if (!*resp) { resp_out = xmalloc(sizeof(job_step_pids_response_msg_t)); *resp = resp_out; created = 1; } else resp_out = *resp; debug("slurm_job_step_get_pids: " "getting pid information of job %u.%u on nodes %s", job_id, step_id, node_list); slurm_msg_t_init(&req_msg); memset(&req, 0, sizeof(job_step_id_msg_t)); resp_out->job_id = req.job_id = job_id; resp_out->step_id = req.step_id = step_id; req_msg.msg_type = REQUEST_JOB_STEP_PIDS; req_msg.data = &req; if (!(ret_list = slurm_send_recv_msgs(node_list, &req_msg, 0, false))) { error("slurm_job_step_get_pids: got an error no list returned"); rc = SLURM_ERROR; if (created) { slurm_job_step_pids_response_msg_free(resp_out); *resp = NULL; } goto cleanup; } itr = list_iterator_create(ret_list); while((ret_data_info = list_next(itr))) { switch (ret_data_info->type) { case RESPONSE_JOB_STEP_PIDS: if (!resp_out->pid_list) resp_out->pid_list = list_create( slurm_free_job_step_pids); list_push(resp_out->pid_list, ret_data_info->data); ret_data_info->data = NULL; break; case RESPONSE_SLURM_RC: rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); error("slurm_job_step_get_pids: " "there was an error with the " "list pid request rc = %s", slurm_strerror(rc)); break; default: rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); error("slurm_job_step_get_pids: " "unknown return given %d rc = %s", ret_data_info->type, slurm_strerror(rc)); break; } } list_iterator_destroy(itr); list_destroy(ret_list); if (resp_out->pid_list) list_sort(resp_out->pid_list, (ListCmpF)_sort_pids_by_name); cleanup: slurm_step_layout_destroy(step_layout); return rc; }
static int _handle_checkpoint_tasks(int fd, stepd_step_rec_t *job, uid_t uid) { int rc = SLURM_SUCCESS; time_t timestamp; int len; char *image_dir = NULL; debug3("_handle_checkpoint_tasks for job %u.%u", job->jobid, job->stepid); safe_read(fd, ×tamp, sizeof(time_t)); safe_read(fd, &len, sizeof(int)); if (len) { image_dir = xmalloc (len); safe_read(fd, image_dir, len); /* '\0' terminated */ } debug3(" uid = %d", uid); if (uid != job->uid && !_slurm_authorized_user(uid)) { debug("checkpoint req from uid %ld for job %u.%u " "owned by uid %ld", (long)uid, job->jobid, job->stepid, (long)job->uid); rc = EPERM; goto done; } if (job->ckpt_timestamp && timestamp == job->ckpt_timestamp) { debug("duplicate checkpoint req for job %u.%u, " "timestamp %ld. discarded.", job->jobid, job->stepid, (long)timestamp); rc = ESLURM_ALREADY_DONE; /* EINPROGRESS? */ goto done; } /* * Sanity checks */ if (job->pgid <= (pid_t)1) { debug ("step %u.%u invalid [jmgr_pid:%d pgid:%u]", job->jobid, job->stepid, job->jmgr_pid, job->pgid); rc = ESLURMD_JOB_NOTRUNNING; goto done; } pthread_mutex_lock(&suspend_mutex); if (suspended) { rc = ESLURMD_STEP_SUSPENDED; pthread_mutex_unlock(&suspend_mutex); goto done; } /* set timestamp in case another request comes */ job->ckpt_timestamp = timestamp; /* TODO: do we need job->ckpt_dir any more, * except for checkpoint/xlch? */ /* if (! image_dir) { */ /* image_dir = xstrdup(job->ckpt_dir); */ /* } */ /* call the plugin to send the request */ if (checkpoint_signal_tasks(job, image_dir) != SLURM_SUCCESS) { rc = -1; verbose("Error sending checkpoint request to %u.%u: %s", job->jobid, job->stepid, slurm_strerror(rc)); } else { verbose("Sent checkpoint request to %u.%u", job->jobid, job->stepid); } pthread_mutex_unlock(&suspend_mutex); done: /* Send the return code */ safe_write(fd, &rc, sizeof(int)); xfree(image_dir); return SLURM_SUCCESS; rwfail: return SLURM_FAILURE; }
/* Modify a job: * CMD=MODIFYJOB ARG=<jobid> * [BANK=<name>;] * [COMMENT=<whatever>;] * [DEPEND=afterany:<jobid>;] * [JOBNAME=<name>;] * [MINSTARTTIME=<uts>;] * [NODES=<number>;] * [PARTITION=<name>;] * [RFEATURES=<features>;] * [TIMELIMT=<seconds>;] * [VARIABLELIST=<env_vars>;] * [GRES=<name:value>;] * [WCKEY=<name>;] * * RET 0 on success, -1 on failure */ extern int job_modify_wiki(char *cmd_ptr, int *err_code, char **err_msg) { char *arg_ptr, *bank_ptr, *depend_ptr, *nodes_ptr, *start_ptr; char *host_ptr, *name_ptr, *part_ptr, *time_ptr, *tmp_char; char *comment_ptr, *feature_ptr, *env_ptr, *gres_ptr, *wckey_ptr; int i, slurm_rc; uint32_t jobid, new_node_cnt = 0, new_time_limit = 0; static char reply_msg[128]; /* Locks: write job, read node and partition info */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; arg_ptr = strstr(cmd_ptr, "ARG="); if (arg_ptr == NULL) { *err_code = -300; *err_msg = "MODIFYJOB lacks ARG="; error("wiki: MODIFYJOB lacks ARG="); return -1; } /* Change all parsed "=" to ":" then search for remaining "=" * and report results as unrecognized options */ arg_ptr[3] = ':'; arg_ptr += 4; jobid = strtoul(arg_ptr, &tmp_char, 10); if ((tmp_char[0] != '\0') && (!isspace(tmp_char[0]))) { *err_code = -300; *err_msg = "Invalid ARG value"; error("wiki: MODIFYJOB has invalid jobid"); return -1; } bank_ptr = strstr(cmd_ptr, "BANK="); comment_ptr = strstr(cmd_ptr, "COMMENT="); depend_ptr = strstr(cmd_ptr, "DEPEND="); gres_ptr = strstr(cmd_ptr, "GRES="); host_ptr = strstr(cmd_ptr, "HOSTLIST="); name_ptr = strstr(cmd_ptr, "JOBNAME="); start_ptr = strstr(cmd_ptr, "MINSTARTTIME="); nodes_ptr = strstr(cmd_ptr, "NODES="); part_ptr = strstr(cmd_ptr, "PARTITION="); feature_ptr = strstr(cmd_ptr, "RFEATURES="); time_ptr = strstr(cmd_ptr, "TIMELIMIT="); env_ptr = strstr(cmd_ptr, "VARIABLELIST="); wckey_ptr = strstr(cmd_ptr, "WCKEY="); if (bank_ptr) { bank_ptr[4] = ':'; bank_ptr += 5; null_term(bank_ptr); } if (comment_ptr) { comment_ptr[7] = ':'; comment_ptr += 8; if (comment_ptr[0] == '\"') { comment_ptr++; for (i=0; ; i++) { if (comment_ptr[i] == '\0') break; if (comment_ptr[i] == '\"') { comment_ptr[i] = '\0'; break; } } } else if (comment_ptr[0] == '\'') { comment_ptr++; for (i=0; ; i++) { if (comment_ptr[i] == '\0') break; if (comment_ptr[i] == '\'') { comment_ptr[i] = '\0'; break; } } } else null_term(comment_ptr); } if (depend_ptr) { depend_ptr[6] = ':'; depend_ptr += 7; null_term(depend_ptr); } if (feature_ptr) { feature_ptr[9] = ':'; feature_ptr += 10; null_term(feature_ptr); } if (gres_ptr) { gres_ptr[4] = ':'; gres_ptr += 5; null_term(gres_ptr); } if (host_ptr) { host_ptr[8] = ':'; host_ptr += 9; null_term(host_ptr); } if (name_ptr) { name_ptr[7] = ':'; name_ptr += 8; if (name_ptr[0] == '\"') { name_ptr++; for (i=0; ; i++) { if (name_ptr[i] == '\0') break; if (name_ptr[i] == '\"') { name_ptr[i] = '\0'; break; } } } else if (name_ptr[0] == '\'') { name_ptr++; for (i=0; ; i++) { if (name_ptr[i] == '\0') break; if (name_ptr[i] == '\'') { name_ptr[i] = '\0'; break; } } } else null_term(name_ptr); } if (start_ptr) { start_ptr[12] = ':'; start_ptr += 13; null_term(start_ptr); } if (nodes_ptr) { nodes_ptr[5] = ':'; nodes_ptr += 6; new_node_cnt = strtoul(nodes_ptr, NULL, 10); } if (part_ptr) { part_ptr[9] = ':'; part_ptr += 10; null_term(part_ptr); } if (time_ptr) { time_ptr[9] = ':'; time_ptr += 10; new_time_limit = strtoul(time_ptr, NULL, 10); } if (env_ptr) { env_ptr[12] = ':'; env_ptr += 13; null_term(env_ptr); } if (wckey_ptr) { wckey_ptr[5] = ':'; wckey_ptr += 6; null_term(wckey_ptr); } /* Look for any un-parsed "=" ignoring anything after VARIABLELIST * which is expected to contain "=" in its value*/ tmp_char = strchr(cmd_ptr, '='); if (tmp_char && (!env_ptr || (env_ptr > tmp_char))) { tmp_char[0] = '\0'; while (tmp_char[-1] && (!isspace(tmp_char[-1]))) tmp_char--; error("wiki: Invalid MODIFYJOB option %s", tmp_char); } lock_slurmctld(job_write_lock); slurm_rc = _job_modify(jobid, bank_ptr, depend_ptr, host_ptr, new_node_cnt, part_ptr, new_time_limit, name_ptr, start_ptr, feature_ptr, env_ptr, comment_ptr, gres_ptr, wckey_ptr); unlock_slurmctld(job_write_lock); if (slurm_rc != SLURM_SUCCESS) { *err_code = -700; *err_msg = slurm_strerror(slurm_rc); error("wiki: Failed to modify job %u (%m)", jobid); return -1; } snprintf(reply_msg, sizeof(reply_msg), "job %u modified successfully", jobid); *err_msg = reply_msg; return 0; }
extern int sacctmgr_archive_load(int argc, char *argv[]) { int rc = SLURM_SUCCESS; slurmdb_archive_rec_t *arch_rec = xmalloc(sizeof(slurmdb_archive_rec_t)); int i=0, command_len = 0; struct stat st; for (i=0; i<argc; i++) { int end = parse_option_end(argv[i]); if (!end) command_len=strlen(argv[i]); else { command_len=end-1; if (argv[i][end] == '=') { end++; } } if (!end || !strncasecmp (argv[i], "File", MAX(command_len, 1))) { arch_rec->archive_file = strip_quotes(argv[i]+end, NULL, 0); } else if (!strncasecmp (argv[i], "Insert", MAX(command_len, 2))) { arch_rec->insert = strip_quotes(argv[i]+end, NULL, 1); } else { exit_code=1; fprintf(stderr, " Unknown option: %s\n", argv[i]); } } if (exit_code) { slurmdb_destroy_archive_rec(arch_rec); return SLURM_ERROR; } if (arch_rec->archive_file) { char *fullpath; char cwd[MAXPATHLEN + 1]; int mode = R_OK; if ((getcwd(cwd, MAXPATHLEN)) == NULL) fatal("getcwd failed: %m"); if ((fullpath = search_path(cwd, arch_rec->archive_file, true, mode))) { xfree(arch_rec->archive_file); arch_rec->archive_file = fullpath; } if (stat(arch_rec->archive_file, &st) < 0) { exit_code = errno; fprintf(stderr, " load: Failed to stat %s: %m\n " "Note: For archive load, the file must be on " "the calling host.\n", arch_rec->archive_file); return SLURM_ERROR; } } rc = jobacct_storage_g_archive_load(db_conn, arch_rec); if (rc == SLURM_SUCCESS) { if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } } else { exit_code=1; fprintf(stderr, " Problem loading archive file: %s\n", slurm_strerror(rc)); rc = SLURM_ERROR; } slurmdb_destroy_archive_rec(arch_rec); return rc; }
extern void get_job(void) { int error_code = -1, i, recs; static int printed_jobs = 0; static int count = 0; static job_info_msg_t *job_info_ptr = NULL, *new_job_ptr = NULL; job_info_t *job_ptr = NULL; uint16_t show_flags = 0; bitstr_t *nodes_req = NULL; static uint16_t last_flags = 0; if (params.all_flag) show_flags |= SHOW_ALL; if (job_info_ptr) { if (show_flags != last_flags) job_info_ptr->last_update = 0; error_code = slurm_load_jobs(job_info_ptr->last_update, &new_job_ptr, show_flags); if (error_code == SLURM_SUCCESS) slurm_free_job_info_msg(job_info_ptr); else if (slurm_get_errno() == SLURM_NO_CHANGE_IN_DATA) { error_code = SLURM_SUCCESS; new_job_ptr = job_info_ptr; } } else error_code = slurm_load_jobs((time_t) NULL, &new_job_ptr, show_flags); last_flags = show_flags; if (error_code) { if (quiet_flag != 1) { if (!params.commandline) { mvwprintw(text_win, main_ycord, 1, "slurm_load_jobs: %s", slurm_strerror(slurm_get_errno())); main_ycord++; } else { printf("slurm_load_jobs: %s\n", slurm_strerror(slurm_get_errno())); } } } if (!params.no_header) _print_header_job(); if (new_job_ptr) recs = new_job_ptr->record_count; else recs = 0; if (!params.commandline) if ((text_line_cnt+printed_jobs) > count) text_line_cnt--; printed_jobs = 0; count = 0; if (params.hl) nodes_req = get_requested_node_bitmap(); for (i = 0; i < recs; i++) { job_ptr = &(new_job_ptr->job_array[i]); if (!IS_JOB_PENDING(job_ptr) && !IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr) && !IS_JOB_COMPLETING(job_ptr)) continue; /* job has completed */ if (nodes_req) { int overlap = 0; bitstr_t *loc_bitmap = bit_alloc(bit_size(nodes_req)); inx2bitstr(loc_bitmap, job_ptr->node_inx); overlap = bit_overlap(loc_bitmap, nodes_req); FREE_NULL_BITMAP(loc_bitmap); if (!overlap) continue; } if (job_ptr->node_inx[0] != -1) { int j = 0; job_ptr->num_nodes = 0; while (job_ptr->node_inx[j] >= 0) { job_ptr->num_nodes += (job_ptr->node_inx[j + 1] + 1) - job_ptr->node_inx[j]; set_grid_inx(job_ptr->node_inx[j], job_ptr->node_inx[j + 1], count); j += 2; } if (!params.commandline) { if ((count >= text_line_cnt) && (printed_jobs < (getmaxy(text_win) - 4))) { job_ptr->num_cpus = (int)letters[count%62]; wattron(text_win, COLOR_PAIR(colors[count%6])); _print_text_job(job_ptr); wattroff(text_win, COLOR_PAIR(colors[count%6])); printed_jobs++; } } else { job_ptr->num_cpus = (int)letters[count%62]; _print_text_job(job_ptr); } count++; } if (count == 128) count = 0; } for (i = 0; i < recs; i++) { job_ptr = &(new_job_ptr->job_array[i]); if (!IS_JOB_PENDING(job_ptr)) continue; /* job has completed */ if (!params.commandline) { if ((count>=text_line_cnt) && (printed_jobs < (getmaxy(text_win) - 4))) { xfree(job_ptr->nodes); job_ptr->nodes = xstrdup("waiting..."); job_ptr->num_cpus = (int) letters[count%62]; wattron(text_win, COLOR_PAIR(colors[count%6])); _print_text_job(job_ptr); wattroff(text_win, COLOR_PAIR(colors[count%6])); printed_jobs++; } } else { xfree(job_ptr->nodes); job_ptr->nodes = xstrdup("waiting..."); job_ptr->num_cpus = (int) letters[count%62]; _print_text_job(job_ptr); printed_jobs++; } count++; if (count == 128) count = 0; } if (params.commandline && params.iterate) printf("\n"); if (!params.commandline) main_ycord++; job_info_ptr = new_job_ptr; return; }
extern void get_info_bb(GtkTable *table, display_data_t *display_data) { int error_code = SLURM_SUCCESS; List info_list = NULL; static int view = -1; static burst_buffer_info_msg_t *bb_info_ptr = NULL; char error_char[100]; GtkWidget *label = NULL; GtkTreeView *tree_view = NULL; static GtkWidget *display_widget = NULL; GtkTreePath *path = NULL; static bool set_opts = FALSE; if (!set_opts) { set_page_opts(BB_PAGE, display_data_bb, SORTID_CNT, _initial_page_opts); } set_opts = TRUE; /* reset */ if (!table && !display_data) { if (display_widget) gtk_widget_destroy(display_widget); display_widget = NULL; bb_info_ptr = NULL; goto reset_curs; } if (display_data) local_display_data = display_data; if (!table) { display_data_bb->set_menu = local_display_data->set_menu; goto reset_curs; } if (display_widget && toggled) { gtk_widget_destroy(display_widget); display_widget = NULL; goto display_it; } error_code = get_new_info_bb(&bb_info_ptr, force_refresh); if (error_code == SLURM_NO_CHANGE_IN_DATA) { } else if (error_code != SLURM_SUCCESS) { if (view == ERROR_VIEW) goto end_it; if (display_widget) gtk_widget_destroy(display_widget); view = ERROR_VIEW; sprintf(error_char, "slurm_load_reservations: %s", slurm_strerror(slurm_get_errno())); label = gtk_label_new(error_char); gtk_table_attach_defaults(table, label, 0, 1, 0, 1); gtk_widget_show(label); display_widget = gtk_widget_ref(GTK_WIDGET(label)); goto end_it; } display_it: info_list = _create_bb_info_list(bb_info_ptr); if (!info_list) { goto reset_curs; } /* set up the grid */ if (display_widget && GTK_IS_TREE_VIEW(display_widget) && gtk_tree_selection_count_selected_rows( gtk_tree_view_get_selection( GTK_TREE_VIEW(display_widget)))) { GtkTreeViewColumn *focus_column = NULL; /* highlight the correct nodes from the last selection */ gtk_tree_view_get_cursor(GTK_TREE_VIEW(display_widget), &path, &focus_column); } change_grid_color(grid_button_list, -1, -1, MAKE_WHITE, true, 0); if (view == ERROR_VIEW && display_widget) { gtk_widget_destroy(display_widget); display_widget = NULL; } if (!display_widget) { tree_view = create_treeview(local_display_data, &grid_button_list); gtk_tree_selection_set_mode( gtk_tree_view_get_selection(tree_view), GTK_SELECTION_MULTIPLE); display_widget = gtk_widget_ref(GTK_WIDGET(tree_view)); gtk_table_attach_defaults(table, GTK_WIDGET(tree_view), 0, 1, 0, 1); /* since this function sets the model of the tree_view to the treestore we don't really care about the return value */ create_treestore(tree_view, display_data_bb, SORTID_CNT, SORTID_NAME, SORTID_COLOR); } view = INFO_VIEW; _update_info_bb(info_list, GTK_TREE_VIEW(display_widget)); end_it: toggled = FALSE; force_refresh = FALSE; reset_curs: if (main_window && main_window->window) gdk_window_set_cursor(main_window->window, NULL); return; }
extern int sacctmgr_delete_account(int argc, char **argv) { int rc = SLURM_SUCCESS; slurmdb_account_cond_t *acct_cond = xmalloc(sizeof(slurmdb_account_cond_t)); int i = 0; List ret_list = NULL, local_assoc_list = NULL; ListIterator itr = NULL; int cond_set = 0, prev_set = 0; for (i = 0; i < argc; i++) { int command_len = strlen(argv[i]); if (!xstrncasecmp(argv[i], "Where", MAX(command_len, 5)) || !xstrncasecmp(argv[i], "Set", MAX(command_len, 3))) i++; prev_set = _set_cond(&i, argc, argv, acct_cond, NULL); cond_set |= prev_set; } if (!cond_set) { exit_code = 1; fprintf(stderr, " No conditions given to remove, not executing.\n"); slurmdb_destroy_account_cond(acct_cond); return SLURM_ERROR; } if (exit_code) { slurmdb_destroy_account_cond(acct_cond); return SLURM_ERROR; } if (!acct_cond->assoc_cond) { error("%s: Association condition is NULL", __func__); slurmdb_destroy_account_cond(acct_cond); return SLURM_ERROR; } /* check to see if person is trying to remove root account. This is * bad, and should not be allowed outside of deleting a cluster. */ if (acct_cond->assoc_cond && acct_cond->assoc_cond->acct_list && list_count(acct_cond->assoc_cond->acct_list)) { char *tmp_char = NULL; itr = list_iterator_create(acct_cond->assoc_cond->acct_list); while ((tmp_char = list_next(itr))) { if (!xstrcasecmp(tmp_char, "root")) break; } list_iterator_destroy(itr); if (tmp_char) { exit_code=1; fprintf(stderr, " You are not allowed to remove " "the root account.\n" " Use remove cluster instead.\n"); slurmdb_destroy_account_cond(acct_cond); return SLURM_ERROR; } } acct_cond->assoc_cond->only_defs = 1; local_assoc_list = slurmdb_associations_get( db_conn, acct_cond->assoc_cond); acct_cond->assoc_cond->only_defs = 0; notice_thread_init(); if (cond_set == 1) { ret_list = slurmdb_accounts_remove( db_conn, acct_cond); } else if (cond_set & 2) { ret_list = slurmdb_associations_remove( db_conn, acct_cond->assoc_cond); } rc = errno; notice_thread_fini(); slurmdb_destroy_account_cond(acct_cond); if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = NULL; /* Check to see if person is trying to remove a default * account of a user. _isdefault only works with the * output from slurmdb_accounts_remove, and * with a previously got assoc_list. */ if (_isdefault(cond_set, ret_list, local_assoc_list)) { exit_code=1; fprintf(stderr, " Please either remove the " "accounts listed " "above from list and resubmit,\n" " or change these users default account to " "remove the account(s).\n" " Changes Discarded\n"); slurmdb_connection_commit(db_conn, 0); goto end_it; } itr = list_iterator_create(ret_list); /* If there were jobs running with an association to be deleted, don't. */ if (rc == ESLURM_JOBS_RUNNING_ON_ASSOC) { fprintf(stderr, " Error with request: %s\n", slurm_strerror(rc)); while((object = list_next(itr))) { fprintf(stderr," %s\n", object); } slurmdb_connection_commit(db_conn, 0); goto end_it; } if (cond_set == 1) { printf(" Deleting accounts...\n"); } else if (cond_set & 2) { printf(" Deleting account associations...\n"); } while((object = list_next(itr))) { printf(" %s\n", object); } list_iterator_destroy(itr); if (commit_check("Would you like to commit changes?")) { slurmdb_connection_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); slurmdb_connection_commit(db_conn, 0); } } else if (ret_list) { printf(" Nothing deleted\n"); rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); rc = SLURM_ERROR; } end_it: FREE_NULL_LIST(ret_list); FREE_NULL_LIST(local_assoc_list); return rc; }
/* Function for full information about a Burst Buffer */ extern void specific_info_bb(popup_info_t *popup_win) { int bb_error_code = SLURM_SUCCESS; static burst_buffer_info_msg_t *bb_info_ptr = NULL; specific_info_t *spec_info = popup_win->spec_info; char error_char[100]; GtkWidget *label = NULL; GtkTreeView *tree_view = NULL; List bb_list = NULL; List send_bb_list = NULL; sview_bb_info_t *sview_bb_info_ptr = NULL; int i=-1; ListIterator itr = NULL; if (!spec_info->display_widget) { setup_popup_info(popup_win, display_data_bb, SORTID_CNT); } if (spec_info->display_widget && popup_win->toggled) { gtk_widget_destroy(spec_info->display_widget); spec_info->display_widget = NULL; goto display_it; } if ((bb_error_code = get_new_info_bb(&bb_info_ptr, popup_win->force_refresh)) == SLURM_NO_CHANGE_IN_DATA) { if (!spec_info->display_widget || spec_info->view == ERROR_VIEW) goto display_it; } else if (bb_error_code != SLURM_SUCCESS) { if (spec_info->view == ERROR_VIEW) goto end_it; spec_info->view = ERROR_VIEW; if (spec_info->display_widget) gtk_widget_destroy(spec_info->display_widget); sprintf(error_char, "get_new_info_bb: %s", slurm_strerror(slurm_get_errno())); label = gtk_label_new(error_char); gtk_table_attach_defaults(popup_win->table, label, 0, 1, 0, 1); gtk_widget_show(label); spec_info->display_widget = gtk_widget_ref(label); goto end_it; } display_it: bb_list = _create_bb_info_list(bb_info_ptr); if (!bb_list) return; if (spec_info->view == ERROR_VIEW && spec_info->display_widget) { gtk_widget_destroy(spec_info->display_widget); spec_info->display_widget = NULL; } if (spec_info->type != INFO_PAGE && !spec_info->display_widget) { tree_view = create_treeview(local_display_data, &popup_win->grid_button_list); gtk_tree_selection_set_mode( gtk_tree_view_get_selection(tree_view), GTK_SELECTION_MULTIPLE); spec_info->display_widget = gtk_widget_ref(GTK_WIDGET(tree_view)); gtk_table_attach_defaults(popup_win->table, GTK_WIDGET(tree_view), 0, 1, 0, 1); /* since this function sets the model of the tree_view * to the treestore we don't really care about * the return value */ create_treestore(tree_view, popup_win->display_data, SORTID_CNT, SORTID_NAME, SORTID_COLOR); } setup_popup_grid_list(popup_win); spec_info->view = INFO_VIEW; if (spec_info->type == INFO_PAGE) { _display_info_bb(bb_list, popup_win); goto end_it; } /* just linking to another list, don't free the inside, just the list */ send_bb_list = list_create(NULL); itr = list_iterator_create(bb_list); i = -1; /* Set up additional menu options(ie the right click menu stuff) */ while ((sview_bb_info_ptr = list_next(itr))) { i++; /* Since we will not use any of these pages we will */ /* leave them blank */ switch(spec_info->type) { case PART_PAGE: case BLOCK_PAGE: case NODE_PAGE: case JOB_PAGE: case RESV_PAGE: default: g_print("Unknown type %d\n", spec_info->type); continue; } list_push(send_bb_list, sview_bb_info_ptr); } list_iterator_destroy(itr); post_setup_popup_grid_list(popup_win); _update_info_bb(send_bb_list, GTK_TREE_VIEW(spec_info->display_widget)); FREE_NULL_LIST(send_bb_list); end_it: popup_win->toggled = 0; popup_win->force_refresh = 0; return; }
extern int sacctmgr_modify_account(int argc, char **argv) { int rc = SLURM_SUCCESS; slurmdb_account_cond_t *acct_cond = xmalloc(sizeof(slurmdb_account_cond_t)); slurmdb_account_rec_t *acct = xmalloc(sizeof(slurmdb_account_rec_t)); slurmdb_assoc_rec_t *assoc = xmalloc(sizeof(slurmdb_assoc_rec_t)); int i=0; int cond_set = 0, prev_set = 0, rec_set = 0, set = 0; List ret_list = NULL; slurmdb_init_assoc_rec(assoc, 0); for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!xstrncasecmp(argv[i], "Where", MAX(command_len, 5))) { i++; prev_set = _set_cond(&i, argc, argv, acct_cond, NULL); cond_set |= prev_set; } else if (!xstrncasecmp(argv[i], "Set", MAX(command_len, 3))) { i++; prev_set = _set_rec(&i, argc, argv, NULL, NULL, acct, assoc); rec_set |= prev_set; } else { prev_set = _set_cond(&i, argc, argv, acct_cond, NULL); cond_set |= prev_set; } } if (exit_code) { slurmdb_destroy_account_cond(acct_cond); slurmdb_destroy_account_rec(acct); slurmdb_destroy_assoc_rec(assoc); return SLURM_ERROR; } else if (!rec_set) { exit_code=1; fprintf(stderr, " You didn't give me anything to set\n"); slurmdb_destroy_account_cond(acct_cond); slurmdb_destroy_account_rec(acct); slurmdb_destroy_assoc_rec(assoc); return SLURM_ERROR; } else if (!cond_set) { if (!commit_check("You didn't set any conditions with 'WHERE'.\n" "Are you sure you want to continue?")) { printf("Aborted\n"); slurmdb_destroy_account_cond(acct_cond); slurmdb_destroy_account_rec(acct); slurmdb_destroy_assoc_rec(assoc); return SLURM_SUCCESS; } } // Special case: reset raw usage only if (assoc->usage) { rc = SLURM_ERROR; if (assoc->usage->usage_raw == 0.0) rc = sacctmgr_remove_assoc_usage(acct_cond->assoc_cond); else error("Raw usage can only be set to 0 (zero)"); slurmdb_destroy_account_cond(acct_cond); slurmdb_destroy_account_rec(acct); slurmdb_destroy_assoc_rec(assoc); return rc; } notice_thread_init(); if (rec_set & 1) { // process the account changes if (cond_set == 2) { exit_code=1; fprintf(stderr, " There was a problem with your " "'where' options.\n"); rc = SLURM_ERROR; goto assoc_start; } ret_list = slurmdb_accounts_modify( db_conn, acct_cond, acct); if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); printf(" Modified accounts...\n"); while((object = list_next(itr))) { printf(" %s\n", object); } list_iterator_destroy(itr); set = 1; } else if (ret_list) { printf(" Nothing modified\n"); rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); rc = SLURM_ERROR; } FREE_NULL_LIST(ret_list); } assoc_start: if (rec_set == 3 || rec_set == 2) { // process the association changes if (cond_set == 1 && !acct_cond->assoc_cond->acct_list) { rc = SLURM_ERROR; exit_code=1; fprintf(stderr, " There was a problem with your " "'where' options.\n"); goto assoc_end; } if (assoc->parent_acct) { slurmdb_account_rec_t *acct_rec = sacctmgr_find_account(assoc->parent_acct); if (!acct_rec) { exit_code=1; fprintf(stderr, " Parent Account %s doesn't exist.\n", assoc->parent_acct); rc = SLURM_ERROR; goto assoc_end; } } ret_list = slurmdb_associations_modify( db_conn, acct_cond->assoc_cond, assoc); if (ret_list && list_count(ret_list)) { set = 1; if (assoc->def_qos_id != NO_VAL) set = sacctmgr_check_default_qos( assoc->def_qos_id, acct_cond->assoc_cond); else if (assoc->qos_list) set = sacctmgr_check_default_qos( -1, acct_cond->assoc_cond); if (set) { char *object = NULL; ListIterator itr = list_iterator_create( ret_list); printf(" Modified account associations...\n"); while((object = list_next(itr))) { printf(" %s\n", object); } list_iterator_destroy(itr); set = 1; } } else if (ret_list) { printf(" Nothing modified\n"); rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); rc = SLURM_ERROR; } FREE_NULL_LIST(ret_list); } assoc_end: notice_thread_fini(); if (set) { if (commit_check("Would you like to commit changes?")) slurmdb_connection_commit(db_conn, 1); else { printf(" Changes Discarded\n"); slurmdb_connection_commit(db_conn, 0); } } slurmdb_destroy_account_cond(acct_cond); slurmdb_destroy_account_rec(acct); slurmdb_destroy_assoc_rec(assoc); return rc; }
/* * _thread_per_group_rpc - thread to issue an RPC for a group of nodes * sending message out to one and forwarding it to * others if necessary. * IN/OUT args - pointer to task_info_t, xfree'd on completion */ static void *_thread_per_group_rpc(void *args) { int rc = SLURM_SUCCESS; slurm_msg_t msg; task_info_t *task_ptr = (task_info_t *) args; /* we cache some pointers from task_info_t because we need * to xfree args before being finished with their use. xfree * is required for timely termination of this pthread because * xfree could lock it at the end, preventing a timely * thread_exit */ pthread_mutex_t *thread_mutex_ptr = task_ptr->thread_mutex_ptr; pthread_cond_t *thread_cond_ptr = task_ptr->thread_cond_ptr; uint32_t *threads_active_ptr = task_ptr->threads_active_ptr; thd_t *thread_ptr = task_ptr->thread_struct_ptr; state_t thread_state = DSH_NO_RESP; slurm_msg_type_t msg_type = task_ptr->msg_type; bool is_kill_msg, srun_agent; List ret_list = NULL; ListIterator itr; ret_data_info_t *ret_data_info = NULL; int found = 0; int sig_array[2] = {SIGUSR1, 0}; /* Locks: Write job, write node */ slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK }; xassert(args != NULL); xsignal(SIGUSR1, _sig_handler); xsignal_unblock(sig_array); is_kill_msg = ( (msg_type == REQUEST_KILL_TIMELIMIT) || (msg_type == REQUEST_TERMINATE_JOB) ); srun_agent = ( (msg_type == SRUN_PING) || (msg_type == SRUN_EXEC) || (msg_type == SRUN_JOB_COMPLETE) || (msg_type == SRUN_STEP_MISSING) || (msg_type == SRUN_TIMEOUT) || (msg_type == SRUN_USER_MSG) || (msg_type == RESPONSE_RESOURCE_ALLOCATION) || (msg_type == SRUN_NODE_FAIL) ); thread_ptr->start_time = time(NULL); slurm_mutex_lock(thread_mutex_ptr); thread_ptr->state = DSH_ACTIVE; thread_ptr->end_time = thread_ptr->start_time + COMMAND_TIMEOUT; slurm_mutex_unlock(thread_mutex_ptr); /* send request message */ slurm_msg_t_init(&msg); msg.msg_type = msg_type; msg.data = task_ptr->msg_args_ptr; #if 0 info("sending message type %u to %s", msg_type, thread_ptr->nodelist); #endif if (task_ptr->get_reply) { if(thread_ptr->addr) { msg.address = *thread_ptr->addr; if(!(ret_list = slurm_send_addr_recv_msgs( &msg, thread_ptr->nodelist, 0))) { error("_thread_per_group_rpc: " "no ret_list given"); goto cleanup; } } else { if(!(ret_list = slurm_send_recv_msgs( thread_ptr->nodelist, &msg, 0, true))) { error("_thread_per_group_rpc: " "no ret_list given"); goto cleanup; } } } else { if(thread_ptr->addr) { //info("got the address"); msg.address = *thread_ptr->addr; } else { //info("no address given"); if(slurm_conf_get_addr(thread_ptr->nodelist, &msg.address) == SLURM_ERROR) { error("_thread_per_group_rpc: " "can't find address for host %s, " "check slurm.conf", thread_ptr->nodelist); goto cleanup; } } //info("sending %u to %s", msg_type, thread_ptr->nodelist); if (slurm_send_only_node_msg(&msg) == SLURM_SUCCESS) { thread_state = DSH_DONE; } else { if (!srun_agent) _comm_err(thread_ptr->nodelist, msg_type); } goto cleanup; } //info("got %d messages back", list_count(ret_list)); found = 0; itr = list_iterator_create(ret_list); while ((ret_data_info = list_next(itr)) != NULL) { rc = slurm_get_return_code(ret_data_info->type, ret_data_info->data); /* SPECIAL CASE: Mark node as IDLE if job already complete */ if (is_kill_msg && (rc == ESLURMD_KILL_JOB_ALREADY_COMPLETE)) { kill_job_msg_t *kill_job; kill_job = (kill_job_msg_t *) task_ptr->msg_args_ptr; rc = SLURM_SUCCESS; lock_slurmctld(job_write_lock); if (job_epilog_complete(kill_job->job_id, ret_data_info-> node_name, rc)) run_scheduler = true; unlock_slurmctld(job_write_lock); } /* SPECIAL CASE: Kill non-startable batch job, * Requeue the job on ESLURMD_PROLOG_FAILED */ if ((msg_type == REQUEST_BATCH_JOB_LAUNCH) && (rc != SLURM_SUCCESS) && (rc != ESLURMD_PROLOG_FAILED) && (ret_data_info->type != RESPONSE_FORWARD_FAILED)) { batch_job_launch_msg_t *launch_msg_ptr = task_ptr->msg_args_ptr; uint32_t job_id = launch_msg_ptr->job_id; info("Killing non-startable batch job %u: %s", job_id, slurm_strerror(rc)); thread_state = DSH_DONE; ret_data_info->err = thread_state; lock_slurmctld(job_write_lock); job_complete(job_id, 0, false, false, _wif_status()); unlock_slurmctld(job_write_lock); continue; } if (((msg_type == REQUEST_SIGNAL_TASKS) || (msg_type == REQUEST_TERMINATE_TASKS)) && (rc == ESRCH)) { /* process is already dead, not a real error */ rc = SLURM_SUCCESS; } switch (rc) { case SLURM_SUCCESS: /* debug("agent processed RPC to node %s", */ /* ret_data_info->node_name); */ thread_state = DSH_DONE; break; case SLURM_UNKNOWN_FORWARD_ADDR: error("We were unable to forward message to '%s'. " "Make sure the slurm.conf for each slurmd " "contain all other nodes in your system.", ret_data_info->node_name); thread_state = DSH_NO_RESP; break; case ESLURMD_EPILOG_FAILED: error("Epilog failure on host %s, " "setting DOWN", ret_data_info->node_name); thread_state = DSH_FAILED; break; case ESLURMD_PROLOG_FAILED: thread_state = DSH_FAILED; break; case ESLURM_INVALID_JOB_ID: /* Not indicative of a real error */ case ESLURMD_JOB_NOTRUNNING: /* Not indicative of a real error */ debug2("agent processed RPC to node %s: %s", ret_data_info->node_name, slurm_strerror(rc)); thread_state = DSH_DONE; break; default: if (!srun_agent) { if (ret_data_info->err) errno = ret_data_info->err; else errno = rc; rc = _comm_err(ret_data_info->node_name, msg_type); } if (srun_agent) thread_state = DSH_FAILED; else if(ret_data_info->type == RESPONSE_FORWARD_FAILED) /* check if a forward failed */ thread_state = DSH_NO_RESP; else { /* some will fail that don't mean anything went * bad like a job term request on a job that is * already finished, we will just exit on those * cases */ thread_state = DSH_DONE; } } ret_data_info->err = thread_state; } list_iterator_destroy(itr); cleanup: xfree(args); /* handled at end of thread just in case resend is needed */ destroy_forward(&msg.forward); slurm_mutex_lock(thread_mutex_ptr); thread_ptr->ret_list = ret_list; thread_ptr->state = thread_state; thread_ptr->end_time = (time_t) difftime(time(NULL), thread_ptr->start_time); /* Signal completion so another thread can replace us */ (*threads_active_ptr)--; pthread_cond_signal(thread_cond_ptr); slurm_mutex_unlock(thread_mutex_ptr); return (void *) NULL; }
extern int sacctmgr_add_qos(int argc, char *argv[]) { int rc = SLURM_SUCCESS; int i=0, limit_set=0; ListIterator itr = NULL; slurmdb_qos_rec_t *qos = NULL; slurmdb_qos_rec_t *start_qos = xmalloc(sizeof(slurmdb_qos_rec_t)); List name_list = list_create(slurm_destroy_char); char *description = NULL; char *name = NULL; List qos_list = NULL; char *qos_str = NULL; slurmdb_init_qos_rec(start_qos, 0, NO_VAL); for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!strncasecmp (argv[i], "Where", MAX(command_len, 5)) || !strncasecmp (argv[i], "Set", MAX(command_len, 3))) i++; limit_set += _set_rec(&i, argc, argv, name_list, start_qos); } if (exit_code) { list_destroy(name_list); xfree(description); return SLURM_ERROR; } else if (!list_count(name_list)) { list_destroy(name_list); slurmdb_destroy_qos_rec(start_qos); exit_code=1; fprintf(stderr, " Need name of qos to add.\n"); return SLURM_SUCCESS; } if (!g_qos_list) { g_qos_list = acct_storage_g_get_qos(db_conn, my_uid, NULL); if (!g_qos_list) { exit_code=1; fprintf(stderr, " Problem getting qos's " "from database. " "Contact your admin.\n"); list_destroy(name_list); xfree(description); return SLURM_ERROR; } } qos_list = list_create(slurmdb_destroy_qos_rec); itr = list_iterator_create(name_list); while((name = list_next(itr))) { qos = NULL; if (!sacctmgr_find_qos_from_list(g_qos_list, name)) { qos = xmalloc(sizeof(slurmdb_qos_rec_t)); slurmdb_init_qos_rec(qos, 0, NO_VAL); qos->name = xstrdup(name); if (start_qos->description) qos->description = xstrdup(start_qos->description); else qos->description = xstrdup(name); qos->flags = start_qos->flags; qos->grace_time = start_qos->grace_time; qos->grp_cpu_mins = start_qos->grp_cpu_mins; qos->grp_cpu_run_mins = start_qos->grp_cpu_run_mins; qos->grp_cpus = start_qos->grp_cpus; qos->grp_jobs = start_qos->grp_jobs; qos->grp_mem = start_qos->grp_mem; qos->grp_nodes = start_qos->grp_nodes; qos->grp_submit_jobs = start_qos->grp_submit_jobs; qos->grp_wall = start_qos->grp_wall; qos->max_cpu_mins_pj = start_qos->max_cpu_mins_pj; qos->max_cpu_run_mins_pu = start_qos->max_cpu_run_mins_pu; qos->max_cpus_pj = start_qos->max_cpus_pj; qos->max_cpus_pu = start_qos->max_cpus_pu; qos->max_jobs_pu = start_qos->max_jobs_pu; qos->max_nodes_pj = start_qos->max_nodes_pj; qos->max_nodes_pu = start_qos->max_nodes_pu; qos->max_submit_jobs_pu = start_qos->max_submit_jobs_pu; qos->max_wall_pj = start_qos->max_wall_pj; qos->min_cpus_pj = start_qos->min_cpus_pj; qos->preempt_list = copy_char_list(start_qos->preempt_list); qos->preempt_mode = start_qos->preempt_mode; qos->priority = start_qos->priority; qos->usage_factor = start_qos->usage_factor; qos->usage_thres = start_qos->usage_thres; xstrfmtcat(qos_str, " %s\n", name); list_append(qos_list, qos); } } list_iterator_destroy(itr); list_destroy(name_list); if (g_qos_list) { list_destroy(g_qos_list); g_qos_list = NULL; } if (!list_count(qos_list)) { printf(" Nothing new added.\n"); rc = SLURM_ERROR; goto end_it; } if (qos_str) { printf(" Adding QOS(s)\n%s", qos_str); printf(" Settings\n"); if (description) printf(" Description = %s\n", description); else printf(" Description = %s\n", "QOS Name"); sacctmgr_print_qos_limits(start_qos); xfree(qos_str); } notice_thread_init(); if (list_count(qos_list)) rc = acct_storage_g_add_qos(db_conn, my_uid, qos_list); else goto end_it; notice_thread_fini(); if (rc == SLURM_SUCCESS) { if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } } else { exit_code=1; fprintf(stderr, " Problem adding QOS: %s\n", slurm_strerror(rc)); rc = SLURM_ERROR; } end_it: list_destroy(qos_list); xfree(description); return rc; }
static void _preempt_job_dequeue(void) { struct job_record *job_ptr; uint32_t job_id, *tmp_id; uint16_t preempt_mode; xassert(preempt_job_list); while ((tmp_id = list_pop(preempt_job_list))) { int rc = SLURM_ERROR; job_id = *tmp_id; xfree(tmp_id); if ((job_ptr = find_job_record(job_id)) == NULL) { error("_preempt_job_dequeue could not find job %u", job_id); continue; } preempt_mode = slurm_job_preempt_mode(job_ptr); if (preempt_mode == PREEMPT_MODE_SUSPEND) { if ((rc = _suspend_job(job_id)) == ESLURM_DISABLED) rc = SLURM_SUCCESS; } else if (preempt_mode == PREEMPT_MODE_CANCEL) { rc = job_signal(job_ptr->job_id, SIGKILL, 0, 0, true); if (rc == SLURM_SUCCESS) { info("preempted job %u has been killed", job_ptr->job_id); } } else if (preempt_mode == PREEMPT_MODE_CHECKPOINT) { checkpoint_msg_t ckpt_msg; memset(&ckpt_msg, 0, sizeof(checkpoint_msg_t)); ckpt_msg.op = CHECK_REQUEUE; ckpt_msg.job_id = job_ptr->job_id; rc = job_checkpoint(&ckpt_msg, 0, -1, (uint16_t)NO_VAL); if (rc == ESLURM_NOT_SUPPORTED) { memset(&ckpt_msg, 0, sizeof(checkpoint_msg_t)); ckpt_msg.op = CHECK_VACATE; ckpt_msg.job_id = job_ptr->job_id; rc = job_checkpoint(&ckpt_msg, 0, -1, (uint16_t)NO_VAL); } if (rc == SLURM_SUCCESS) { info("preempted job %u has been checkpointed", job_ptr->job_id); } else error("preempted job %u could not be " "checkpointed: %s", job_ptr->job_id, slurm_strerror(rc)); } else if ((preempt_mode == PREEMPT_MODE_REQUEUE) && job_ptr->batch_flag && job_ptr->details && (job_ptr->details->requeue > 0)) { rc = job_requeue(0, job_ptr->job_id, NULL, true, 0); if (rc == SLURM_SUCCESS) { info("preempted job %u has been requeued", job_ptr->job_id); } else error("preempted job %u could not be " "requeued: %s", job_ptr->job_id, slurm_strerror(rc)); } else if (preempt_mode == PREEMPT_MODE_OFF) { error("Invalid preempt_mode %u for job %u", preempt_mode, job_ptr->job_id); continue; } if (rc != SLURM_SUCCESS) { rc = job_signal(job_ptr->job_id, SIGKILL, 0, 0, true); if (rc == SLURM_SUCCESS) info("%s: preempted job %u had to be killed", __func__,job_ptr->job_id); else { info("%s: preempted job %u kill failure %s", __func__, job_ptr->job_id, slurm_strerror(rc)); } } } return; }
/* * Print "message: error description" on stderr for current errno value. */ void slurm_perror(char *msg) { fprintf(stderr, "%s: %s\n", msg, slurm_strerror(errno)); }
extern int sacctmgr_add_cluster(int argc, char *argv[]) { int rc = SLURM_SUCCESS; int i = 0; slurmdb_cluster_rec_t *cluster = NULL; List name_list = list_create(slurm_destroy_char); List cluster_list = NULL; slurmdb_association_rec_t start_assoc; int limit_set = 0; ListIterator itr = NULL, itr_c = NULL; char *name = NULL; uint16_t class = 0; slurmdb_init_association_rec(&start_assoc, 0); for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!strncasecmp(argv[i], "Where", MAX(command_len, 5)) || !strncasecmp(argv[i], "Set", MAX(command_len, 3))) i++; limit_set += _set_rec(&i, argc, argv, name_list, &start_assoc, &class); } if (exit_code) { list_destroy(name_list); return SLURM_ERROR; } else if (!list_count(name_list)) { list_destroy(name_list); exit_code=1; fprintf(stderr, " Need name of cluster to add.\n"); return SLURM_ERROR; } else { List temp_list = NULL; slurmdb_cluster_cond_t cluster_cond; slurmdb_init_cluster_cond(&cluster_cond, 0); cluster_cond.cluster_list = name_list; cluster_cond.classification = class; temp_list = acct_storage_g_get_clusters(db_conn, my_uid, &cluster_cond); if (!temp_list) { exit_code=1; fprintf(stderr, " Problem getting clusters from database. " "Contact your admin.\n"); return SLURM_ERROR; } itr_c = list_iterator_create(name_list); itr = list_iterator_create(temp_list); while((name = list_next(itr_c))) { slurmdb_cluster_rec_t *cluster_rec = NULL; list_iterator_reset(itr); while((cluster_rec = list_next(itr))) { if (!strcasecmp(cluster_rec->name, name)) break; } if (cluster_rec) { printf(" This cluster %s already exists. " "Not adding.\n", name); list_delete_item(itr_c); } } list_iterator_destroy(itr); list_iterator_destroy(itr_c); list_destroy(temp_list); if (!list_count(name_list)) { list_destroy(name_list); return SLURM_ERROR; } } printf(" Adding Cluster(s)\n"); cluster_list = list_create(slurmdb_destroy_cluster_rec); itr = list_iterator_create(name_list); while((name = list_next(itr))) { if (!name[0]) { exit_code=1; fprintf(stderr, " No blank names are " "allowed when adding.\n"); rc = SLURM_ERROR; continue; } cluster = xmalloc(sizeof(slurmdb_cluster_rec_t)); slurmdb_init_cluster_rec(cluster, 0); list_append(cluster_list, cluster); cluster->flags = NO_VAL; cluster->name = xstrdup(name); cluster->classification = class; cluster->root_assoc = xmalloc(sizeof(slurmdb_association_rec_t)); slurmdb_init_association_rec(cluster->root_assoc, 0); printf(" Name = %s\n", cluster->name); if (cluster->classification) printf(" Classification= %s\n", get_classification_str(cluster->classification)); cluster->root_assoc->def_qos_id = start_assoc.def_qos_id; cluster->root_assoc->shares_raw = start_assoc.shares_raw; cluster->root_assoc->grp_cpus = start_assoc.grp_cpus; cluster->root_assoc->grp_jobs = start_assoc.grp_jobs; cluster->root_assoc->grp_mem = start_assoc.grp_mem; cluster->root_assoc->grp_nodes = start_assoc.grp_nodes; cluster->root_assoc->grp_submit_jobs = start_assoc.grp_submit_jobs; cluster->root_assoc->max_cpu_mins_pj = start_assoc.max_cpu_mins_pj; cluster->root_assoc->max_cpus_pj = start_assoc.max_cpus_pj; cluster->root_assoc->max_jobs = start_assoc.max_jobs; cluster->root_assoc->max_nodes_pj = start_assoc.max_nodes_pj; cluster->root_assoc->max_submit_jobs = start_assoc.max_submit_jobs; cluster->root_assoc->max_wall_pj = start_assoc.max_wall_pj; cluster->root_assoc->qos_list = copy_char_list(start_assoc.qos_list); } list_iterator_destroy(itr); list_destroy(name_list); if (limit_set) { printf(" Default Limits\n"); sacctmgr_print_assoc_limits(&start_assoc); if (start_assoc.qos_list) list_destroy(start_assoc.qos_list); } if (!list_count(cluster_list)) { printf(" Nothing new added.\n"); rc = SLURM_ERROR; goto end_it; } /* Since we are creating tables with add cluster that can't be rolled back. So we ask before hand if they are serious about it so we can rollback if needed. */ if (commit_check("Would you like to commit changes?")) { notice_thread_init(); rc = acct_storage_g_add_clusters(db_conn, my_uid, cluster_list); notice_thread_fini(); if (rc == SLURM_SUCCESS) { acct_storage_g_commit(db_conn, 1); } else { exit_code=1; fprintf(stderr, " Problem adding clusters: %s\n", slurm_strerror(rc)); /* this isn't really needed, but just to be safe */ acct_storage_g_commit(db_conn, 0); } } else { printf(" Changes Discarded\n"); /* this isn't really needed, but just to be safe */ acct_storage_g_commit(db_conn, 0); } end_it: list_destroy(cluster_list); return rc; }
extern void get_info_resv(GtkTable *table, display_data_t *display_data) { int error_code = SLURM_SUCCESS; List info_list = NULL; static int view = -1; static reserve_info_msg_t *resv_info_ptr = NULL; char error_char[100]; GtkWidget *label = NULL; GtkTreeView *tree_view = NULL; static GtkWidget *display_widget = NULL; int j=0; ListIterator itr = NULL; sview_resv_info_t *sview_resv_info_ptr = NULL; reserve_info_t *resv_ptr = NULL; time_t now = time(NULL); GtkTreePath *path = NULL; static bool set_opts = FALSE; if (!set_opts) set_page_opts(RESV_PAGE, display_data_resv, SORTID_CNT, _initial_page_opts); set_opts = TRUE; /* reset */ if (!table && !display_data) { if (display_widget) gtk_widget_destroy(display_widget); display_widget = NULL; resv_info_ptr = NULL; goto reset_curs; } if (display_data) local_display_data = display_data; if (!table) { display_data_resv->set_menu = local_display_data->set_menu; goto reset_curs; } if (display_widget && toggled) { gtk_widget_destroy(display_widget); display_widget = NULL; goto display_it; } error_code = get_new_info_resv(&resv_info_ptr, force_refresh); if (error_code == SLURM_NO_CHANGE_IN_DATA) { } else if (error_code != SLURM_SUCCESS) { if (view == ERROR_VIEW) goto end_it; if (display_widget) gtk_widget_destroy(display_widget); view = ERROR_VIEW; sprintf(error_char, "slurm_load_reservations: %s", slurm_strerror(slurm_get_errno())); label = gtk_label_new(error_char); gtk_table_attach_defaults(table, label, 0, 1, 0, 1); gtk_widget_show(label); display_widget = gtk_widget_ref(GTK_WIDGET(label)); goto end_it; } display_it: info_list = _create_resv_info_list(resv_info_ptr); if (!info_list) goto reset_curs; /* set up the grid */ if (display_widget && GTK_IS_TREE_VIEW(display_widget) && gtk_tree_selection_count_selected_rows( gtk_tree_view_get_selection( GTK_TREE_VIEW(display_widget)))) { GtkTreeViewColumn *focus_column = NULL; /* highlight the correct nodes from the last selection */ gtk_tree_view_get_cursor(GTK_TREE_VIEW(display_widget), &path, &focus_column); } if (!path) { itr = list_iterator_create(info_list); while ((sview_resv_info_ptr = list_next(itr))) { resv_ptr = sview_resv_info_ptr->resv_ptr; if ((resv_ptr->start_time > now) || (resv_ptr->end_time < now)) continue;/* only map current reservations */ j=0; while (resv_ptr->node_inx[j] >= 0) { change_grid_color(grid_button_list, resv_ptr->node_inx[j], resv_ptr->node_inx[j+1], sview_resv_info_ptr-> color_inx, true, 0); j += 2; } } list_iterator_destroy(itr); change_grid_color(grid_button_list, -1, -1, MAKE_WHITE, true, 0); } else highlight_grid(GTK_TREE_VIEW(display_widget), SORTID_NODE_INX, SORTID_COLOR_INX, grid_button_list); if (view == ERROR_VIEW && display_widget) { gtk_widget_destroy(display_widget); display_widget = NULL; } if (!display_widget) { tree_view = create_treeview(local_display_data, &grid_button_list); gtk_tree_selection_set_mode( gtk_tree_view_get_selection(tree_view), GTK_SELECTION_MULTIPLE); display_widget = gtk_widget_ref(GTK_WIDGET(tree_view)); gtk_table_attach_defaults(table, GTK_WIDGET(tree_view), 0, 1, 0, 1); /* since this function sets the model of the tree_view to the treestore we don't really care about the return value */ create_treestore(tree_view, display_data_resv, SORTID_CNT, SORTID_TIME_START, SORTID_COLOR); } view = INFO_VIEW; _update_info_resv(info_list, GTK_TREE_VIEW(display_widget)); end_it: toggled = FALSE; force_refresh = FALSE; reset_curs: if (main_window && main_window->window) gdk_window_set_cursor(main_window->window, NULL); return; }
extern int sacctmgr_modify_cluster(int argc, char *argv[]) { int rc = SLURM_SUCCESS; int i=0; slurmdb_association_rec_t *assoc = xmalloc(sizeof(slurmdb_association_rec_t)); slurmdb_association_cond_t *assoc_cond = xmalloc(sizeof(slurmdb_association_cond_t)); int cond_set = 0, prev_set = 0, rec_set = 0, set = 0; List ret_list = NULL; uint16_t class_rec = 0; slurmdb_cluster_cond_t cluster_cond; slurmdb_init_association_rec(assoc, 0); assoc_cond->cluster_list = list_create(slurm_destroy_char); assoc_cond->acct_list = list_create(NULL); slurmdb_init_cluster_cond(&cluster_cond, 0); cluster_cond.cluster_list = assoc_cond->cluster_list; for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!strncasecmp(argv[i], "Where", MAX(command_len, 5))) { i++; prev_set = _set_cond(&i, argc, argv, &cluster_cond, NULL); cond_set |= prev_set; } else if (!strncasecmp(argv[i], "Set", MAX(command_len, 3))) { i++; prev_set = _set_rec(&i, argc, argv, NULL, assoc, &class_rec); rec_set |= prev_set; } else { prev_set = _set_cond(&i, argc, argv, &cluster_cond, NULL); cond_set |= prev_set; } } if (!rec_set) { exit_code=1; fprintf(stderr, " You didn't give me anything to set\n"); rc = SLURM_ERROR; goto end_it; } else if (!cond_set) { if (!commit_check("You didn't set any conditions with 'WHERE'.\n" "Are you sure you want to continue?")) { printf("Aborted\n"); rc = SLURM_SUCCESS; goto end_it; } } else if (exit_code) { rc = SLURM_ERROR; goto end_it; } if (cond_set & 1) { List temp_list = NULL; temp_list = acct_storage_g_get_clusters(db_conn, my_uid, &cluster_cond); if (!temp_list) { exit_code=1; fprintf(stderr, " Problem getting clusters from database. " "Contact your admin.\n"); rc = SLURM_ERROR; goto end_it; } else if (!list_count(temp_list)) { fprintf(stderr, " Query didn't return any clusters.\n"); rc = SLURM_ERROR; goto end_it; } /* we are only looking for the clusters returned from this query, so we free the cluster_list and replace it */ if (assoc_cond->cluster_list) list_destroy(assoc_cond->cluster_list); assoc_cond->cluster_list = temp_list; } printf(" Setting\n"); if (rec_set) { printf(" Default Limits =\n"); sacctmgr_print_assoc_limits(assoc); if (class_rec) printf(" Cluster Classification = %s\n", get_classification_str(class_rec)); } list_append(assoc_cond->acct_list, "root"); notice_thread_init(); ret_list = acct_storage_g_modify_associations( db_conn, my_uid, assoc_cond, assoc); if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); printf(" Modified cluster defaults for associations...\n"); while((object = list_next(itr))) { printf(" %s\n", object); } list_iterator_destroy(itr); set = 1; } else if (ret_list) { printf(" Nothing modified\n"); rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); rc = SLURM_ERROR; } if (ret_list) list_destroy(ret_list); if (class_rec) { slurmdb_cluster_rec_t cluster_rec; slurmdb_init_cluster_rec(&cluster_rec, 0); /* the class has already returned these clusters so just go with it */ cluster_rec.classification = class_rec; ret_list = acct_storage_g_modify_clusters( db_conn, my_uid, &cluster_cond, &cluster_rec); if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); printf(" Modified cluster classifications...\n"); while((object = list_next(itr))) { printf(" %s\n", object); } list_iterator_destroy(itr); set = 1; } else if (ret_list) { printf(" Nothing modified\n"); rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); rc = SLURM_ERROR; } if (ret_list) list_destroy(ret_list); } notice_thread_fini(); if (set) { if (commit_check("Would you like to commit changes?")) acct_storage_g_commit(db_conn, 1); else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } } end_it: slurmdb_destroy_association_cond(assoc_cond); slurmdb_destroy_association_rec(assoc); return rc; }
static int _verify_job_ids(void) { job_info_t *job_ptr; int i, j, rc = 0; if (opt.job_cnt == 0) return rc; opt.job_found = xmalloc(sizeof(bool) * opt.job_cnt); opt.job_pend = xmalloc(sizeof(bool) * opt.job_cnt); job_ptr = job_buffer_ptr->job_array; for (i = 0; i < job_buffer_ptr->record_count; i++, job_ptr++) { /* NOTE: We re-use the job's "assoc_id" value as a flag to * record if the job is referenced in the job list supplied * by the user. */ job_ptr->assoc_id = 0; if (IS_JOB_FINISHED(job_ptr)) job_ptr->job_id = 0; if (job_ptr->job_id == 0) continue; for (j = 0; j < opt.job_cnt; j++) { if (opt.array_id[j] == NO_VAL) { if ((opt.job_id[j] == job_ptr->job_id) || ((opt.job_id[j] == job_ptr->array_job_id) && (opt.step_id[j] == SLURM_BATCH_SCRIPT))) { opt.job_found[j] = true; } } else if (opt.array_id[j] == INFINITE) { if (opt.job_id[j] == job_ptr->array_job_id) { opt.job_found[j] = true; } } else if (opt.job_id[j] != job_ptr->array_job_id) { continue; } else if (_is_task_in_job(job_ptr, opt.array_id[j])) { opt.job_found[j] = true; } if (opt.job_found[j]) { if (IS_JOB_PENDING(job_ptr)) opt.job_pend[j] = true; job_ptr->assoc_id = 1; } } if (job_ptr->assoc_id == 0) job_ptr->job_id = 0; } for (j = 0; j < opt.job_cnt; j++) { char *job_id_str = NULL; if (!opt.job_found[j]) rc = 1; else continue; if (opt.verbose < 0) { ; } else if (opt.array_id[j] == NO_VAL) { xstrfmtcat(job_id_str, "%u", opt.job_id[j]); } else if (opt.array_id[j] == INFINITE) { xstrfmtcat(job_id_str, "%u_*", opt.job_id[j]); } else { xstrfmtcat(job_id_str, "%u_%u", opt.job_id[j], opt.array_id[j]); } if (opt.verbose < 0) { ; } else if (opt.step_id[j] == SLURM_BATCH_SCRIPT) { error("Kill job error on job id %s: %s", job_id_str, slurm_strerror(ESLURM_INVALID_JOB_ID)); } else { error("Kill job error on job step id %s.%u: %s", job_id_str, opt.step_id[j], slurm_strerror(ESLURM_INVALID_JOB_ID)); } xfree(job_id_str); /* Avoid this job in the cancel_job logic */ opt.job_id[j] = 0; } return rc; }
extern int sacctmgr_delete_cluster(int argc, char *argv[]) { int rc = SLURM_SUCCESS; slurmdb_cluster_cond_t *cluster_cond = xmalloc(sizeof(slurmdb_cluster_cond_t)); int i=0; List ret_list = NULL; int cond_set = 0, prev_set; slurmdb_init_cluster_cond(cluster_cond, 0); cluster_cond->cluster_list = list_create(slurm_destroy_char); for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!strncasecmp(argv[i], "Where", MAX(command_len, 5)) || !strncasecmp(argv[i], "Set", MAX(command_len, 3))) i++; prev_set = _set_cond(&i, argc, argv, cluster_cond, NULL); cond_set |= prev_set; } if (exit_code) { slurmdb_destroy_cluster_cond(cluster_cond); return SLURM_ERROR; } else if (!cond_set) { exit_code=1; fprintf(stderr, " No conditions given to remove, not executing.\n"); slurmdb_destroy_cluster_cond(cluster_cond); return SLURM_ERROR; } if (!list_count(cluster_cond->cluster_list) && !cluster_cond->classification) { exit_code=1; fprintf(stderr, "problem with delete request. " "Nothing given to delete.\n"); slurmdb_destroy_cluster_cond(cluster_cond); return SLURM_SUCCESS; } notice_thread_init(); ret_list = acct_storage_g_remove_clusters( db_conn, my_uid, cluster_cond); rc = errno; notice_thread_fini(); slurmdb_destroy_cluster_cond(cluster_cond); if (ret_list && list_count(ret_list)) { char *object = NULL; ListIterator itr = list_iterator_create(ret_list); /* If there were jobs running with an association to be deleted, don't. */ if (rc == ESLURM_JOBS_RUNNING_ON_ASSOC) { fprintf(stderr, " Error with request: %s\n", slurm_strerror(rc)); while((object = list_next(itr))) { fprintf(stderr," %s\n", object); } list_destroy(ret_list); acct_storage_g_commit(db_conn, 0); return rc; } printf(" Deleting clusters...\n"); while((object = list_next(itr))) { printf(" %s\n", object); } list_iterator_destroy(itr); if (commit_check("Would you like to commit changes?")) { acct_storage_g_commit(db_conn, 1); } else { printf(" Changes Discarded\n"); acct_storage_g_commit(db_conn, 0); } } else if (ret_list) { printf(" Nothing deleted\n"); rc = SLURM_ERROR; } else { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); rc = SLURM_ERROR; } if (ret_list) list_destroy(ret_list); return rc; }
static void * _cancel_step_id (void *ci) { int error_code = SLURM_SUCCESS, i; job_cancel_info_t *cancel_info = (job_cancel_info_t *)ci; uint32_t job_id = cancel_info->job_id; uint32_t step_id = cancel_info->step_id; bool sig_set = true; DEF_TIMERS; if (cancel_info->sig == (uint16_t) NO_VAL) { cancel_info->sig = SIGKILL; sig_set = false; } if (!cancel_info->job_id_str) { if (cancel_info->array_job_id && (cancel_info->array_task_id == INFINITE)) { xstrfmtcat(cancel_info->job_id_str, "%u_*", cancel_info->array_job_id); } else if (cancel_info->array_job_id) { xstrfmtcat(cancel_info->job_id_str, "%u_%u", cancel_info->array_job_id, cancel_info->array_task_id); } else { xstrfmtcat(cancel_info->job_id_str, "%u", cancel_info->job_id); } } for (i = 0; i < MAX_CANCEL_RETRY; i++) { if (cancel_info->sig == SIGKILL) { verbose("Terminating step %s.%u", cancel_info->job_id_str, step_id); } else { verbose("Signal %u to step %s.%u", cancel_info->sig, cancel_info->job_id_str, step_id); } _add_delay(); START_TIMER; if ((!sig_set) || opt.ctld) error_code = slurm_kill_job_step(job_id, step_id, cancel_info->sig); else if (cancel_info->sig == SIGKILL) error_code = slurm_terminate_job_step(job_id, step_id); else error_code = slurm_signal_job_step(job_id, step_id, cancel_info->sig); END_TIMER; slurm_mutex_lock(&max_delay_lock); max_resp_time = MAX(max_resp_time, DELTA_TIMER); slurm_mutex_unlock(&max_delay_lock); if ((error_code == 0) || ((errno != ESLURM_TRANSITION_STATE_NO_UPDATE) && (errno != ESLURM_JOB_PENDING))) break; verbose("Job is in transistional state, retrying"); sleep(5 + i); } if (error_code) { error_code = slurm_get_errno(); if ((opt.verbose > 0) || (error_code != ESLURM_ALREADY_DONE)) error("Kill job error on job step id %s: %s", cancel_info->job_id_str, slurm_strerror(slurm_get_errno())); if ((error_code == ESLURM_ALREADY_DONE) && (cancel_info->sig == SIGKILL)) { error_code = 0; /* Ignore error if job done */ } } /* Purposely free the struct passed in here, so the caller doesn't have * to keep track of it, but don't destroy the mutex and condition * variables contained. */ slurm_mutex_lock(cancel_info->num_active_threads_lock); *(cancel_info->rc) = MAX(*(cancel_info->rc), error_code); (*(cancel_info->num_active_threads))--; slurm_cond_signal(cancel_info->num_active_threads_cond); slurm_mutex_unlock(cancel_info->num_active_threads_lock); xfree(cancel_info->job_id_str); xfree(cancel_info); return NULL; }
extern int sacctmgr_list_problem(int argc, char *argv[]) { int rc = SLURM_SUCCESS; slurmdb_association_cond_t *assoc_cond = xmalloc(sizeof(slurmdb_association_cond_t)); List assoc_list = NULL; slurmdb_association_rec_t *assoc = NULL; int i=0; ListIterator itr = NULL; ListIterator itr2 = NULL; List tree_list = NULL; int field_count = 0; print_field_t *field = NULL; List format_list = list_create(slurm_destroy_char); List print_fields_list; /* types are of print_field_t */ for (i=0; i<argc; i++) { int command_len = strlen(argv[i]); if (!strncasecmp (argv[i], "Where", MAX(command_len, 5)) || !strncasecmp (argv[i], "Set", MAX(command_len, 3))) i++; _set_cond(&i, argc, argv, assoc_cond, format_list); } if (exit_code) { slurmdb_destroy_association_cond(assoc_cond); list_destroy(format_list); return SLURM_ERROR; } else if (!list_count(format_list)) slurm_addto_char_list(format_list, "Cl,Acct,User,Problem"); print_fields_list = sacctmgr_process_format_list(format_list); list_destroy(format_list); if (exit_code) { slurmdb_destroy_association_cond(assoc_cond); list_destroy(print_fields_list); return SLURM_ERROR; } assoc_list = acct_storage_g_get_problems(db_conn, my_uid, assoc_cond); slurmdb_destroy_association_cond(assoc_cond); if (!assoc_list) { exit_code=1; fprintf(stderr, " Error with request: %s\n", slurm_strerror(errno)); list_destroy(print_fields_list); return SLURM_ERROR; } itr = list_iterator_create(assoc_list); itr2 = list_iterator_create(print_fields_list); print_fields_header(print_fields_list); field_count = list_count(print_fields_list); while((assoc = list_next(itr))) { int curr_inx = 1; while((field = list_next(itr2))) { switch(field->type) { case PRINT_ACCT: field->print_routine( field, assoc->acct, (curr_inx == field_count)); break; case PRINT_CLUSTER: field->print_routine( field, assoc->cluster, (curr_inx == field_count)); break; case PRINT_PROBLEM: /* make some sort of string here to print out the problem reported. Maybe make an array or something and just print out a standard error. */ field->print_routine( field, slurmdb_problem_str_get(assoc->id), (curr_inx == field_count)); break; case PRINT_USER: field->print_routine(field, assoc->user, (curr_inx == field_count)); break; default: field->print_routine( field, NULL, (curr_inx == field_count)); break; } curr_inx++; } list_iterator_reset(itr2); printf("\n"); } if (tree_list) list_destroy(tree_list); list_iterator_destroy(itr2); list_iterator_destroy(itr); list_destroy(assoc_list); list_destroy(print_fields_list); tree_display = 0; return rc; }