/* * slurm_job_will_run - determine if a job would execute immediately if * submitted now * IN job_desc_msg - description of resource allocation request * RET 0 on success, otherwise return -1 and set errno to indicate the error */ int slurm_job_will_run(job_desc_msg_t *req) { will_run_response_msg_t *will_run_resp = NULL; char buf[64], local_hostname[64]; int rc; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); char *type = "processors"; char *cluster_name = NULL; void *ptr = NULL; if ((req->alloc_node == NULL) && (gethostname_short(local_hostname, sizeof(local_hostname)) == 0)) { req->alloc_node = local_hostname; } if (working_cluster_rec) cluster_name = working_cluster_rec->name; else cluster_name = slurmctld_conf.cluster_name; if (!slurm_load_federation(&ptr) && cluster_in_federation(ptr, cluster_name)) rc = _fed_job_will_run(req, &will_run_resp, ptr); else rc = slurm_job_will_run2(req, &will_run_resp); if ((rc == 0) && will_run_resp) { if (cluster_flags & CLUSTER_FLAG_BG) type = "cnodes"; slurm_make_time_str(&will_run_resp->start_time, buf, sizeof(buf)); info("Job %u to start at %s using %u %s on %s", will_run_resp->job_id, buf, will_run_resp->proc_cnt, type, will_run_resp->node_list); if (will_run_resp->preemptee_job_id) { ListIterator itr; uint32_t *job_id_ptr; char *job_list = NULL, *sep = ""; itr = list_iterator_create(will_run_resp-> preemptee_job_id); while ((job_id_ptr = list_next(itr))) { if (job_list) sep = ","; xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr); } list_iterator_destroy(itr); info(" Preempts: %s", job_list); xfree(job_list); } slurm_free_will_run_response_msg(will_run_resp); } if (req->alloc_node == local_hostname) req->alloc_node = NULL; if (ptr) slurm_destroy_federation_rec(ptr); return rc; }
/* Initialize slurmd configuration table. * Everything is already NULL/zero filled when called */ static void _init_conf(void) { char host[MAXHOSTNAMELEN]; log_options_t lopts = LOG_OPTS_INITIALIZER; if (gethostname_short(host, MAXHOSTNAMELEN) < 0) { error("Unable to get my hostname: %m"); exit(1); } conf->hostname = xstrdup(host); conf->daemonize = 1; conf->lfd = -1; conf->log_opts = lopts; conf->debug_level = LOG_LEVEL_INFO; conf->pidfile = xstrdup(DEFAULT_SLURMD_PIDFILE); conf->spooldir = xstrdup(DEFAULT_SPOOLDIR); slurm_mutex_init(&conf->config_mutex); conf->starting_steps = list_create(destroy_starting_step); slurm_mutex_init(&conf->starting_steps_lock); pthread_cond_init(&conf->starting_steps_cond, NULL); conf->prolog_running_jobs = list_create(slurm_destroy_uint32_ptr); slurm_mutex_init(&conf->prolog_running_lock); pthread_cond_init(&conf->prolog_running_cond, NULL); return; }
static void _print_config(void) { int days, hours, mins, secs; char name[128]; gethostname_short(name, sizeof(name)); printf("NodeName=%s ", name); get_cpuinfo(&conf->actual_cpus, &conf->actual_boards, &conf->actual_sockets, &conf->actual_cores, &conf->actual_threads, &conf->block_map_size, &conf->block_map, &conf->block_map_inv); printf("CPUs=%u Boards=%u SocketsPerBoard=%u CoresPerSocket=%u " "ThreadsPerCore=%u ", conf->actual_cpus, conf->actual_boards, conf->actual_sockets, conf->actual_cores, conf->actual_threads); get_memory(&conf->real_memory_size); get_tmp_disk(&conf->tmp_disk_space, "/tmp"); printf("RealMemory=%u TmpDisk=%u\n", conf->real_memory_size, conf->tmp_disk_space); get_up_time(&conf->up_time); secs = conf->up_time % 60; mins = (conf->up_time / 60) % 60; hours = (conf->up_time / 3600) % 24; days = (conf->up_time / 86400); printf("UpTime=%u-%2.2u:%2.2u:%2.2u\n", days, hours, mins, secs); }
/* * slurm_submit_batch_job - issue RPC to submit a job for later execution * NOTE: free the response using slurm_free_submit_response_response_msg * IN job_desc_msg - description of batch job request * OUT slurm_alloc_msg - response to request * RET 0 on success, otherwise return -1 and set errno to indicate the error */ int slurm_submit_batch_job (job_desc_msg_t *req, submit_response_msg_t **resp) { int rc; slurm_msg_t req_msg; slurm_msg_t resp_msg; bool host_set = false; char host[64]; slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); /* * set Node and session id for this request */ if (req->alloc_sid == NO_VAL) req->alloc_sid = getsid(0); if ( (req->alloc_node == NULL) && (gethostname_short(host, sizeof(host)) == 0) ) { req->alloc_node = host; host_set = true; } req_msg.msg_type = REQUEST_SUBMIT_BATCH_JOB ; req_msg.data = req; rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg); /* * Clear this hostname if set internally to this function * (memory is on the stack) */ if (host_set) req->alloc_node = NULL; if (rc == SLURM_SOCKET_ERROR) return SLURM_ERROR; switch (resp_msg.msg_type) { case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; if (rc) slurm_seterrno_ret(rc); *resp = NULL; break; case RESPONSE_SUBMIT_BATCH_JOB: *resp = (submit_response_msg_t *) resp_msg.data; break; default: slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); } return SLURM_PROTOCOL_SUCCESS; }
extern int sbatch_set_first_avail_cluster(job_desc_msg_t *req) { //printf("cao: src/sbatch/multi_cluster.c: sbatch_set_first_avail_cluster(job_desc_msg_t *req)\n"); int rc = SLURM_SUCCESS; ListIterator itr; local_cluster_rec_t *local_cluster = NULL; char buf[64]; bool host_set = false; List ret_list = NULL; /* return if we only have 1 or less clusters here */ if (!opt.clusters || !list_count(opt.clusters)) { return rc; } else if (list_count(opt.clusters) == 1) { working_cluster_rec = list_peek(opt.clusters); return rc; } if ((req->alloc_node == NULL) && (gethostname_short(buf, sizeof(buf)) == 0)) { req->alloc_node = buf; host_set = true; } ret_list = list_create(_destroy_local_cluster_rec); itr = list_iterator_create(opt.clusters); while ((working_cluster_rec = list_next(itr))) { if ((local_cluster = _job_will_run(req))) list_append(ret_list, local_cluster); else error("Problem with submit to cluster %s: %m", working_cluster_rec->name); } list_iterator_destroy(itr); if (host_set) req->alloc_node = NULL; if (!list_count(ret_list)) { error("Can't run on any of the clusters given"); rc = SLURM_ERROR; goto end_it; } /* sort the list so the first spot is on top */ local_cluster_name = slurm_get_cluster_name(); list_sort(ret_list, (ListCmpF)_sort_local_cluster); xfree(local_cluster_name); local_cluster = list_peek(ret_list); /* set up the working cluster and be done */ working_cluster_rec = local_cluster->cluster_rec; end_it: list_destroy(ret_list); return rc; }
/* * slurm_allocate_resources - allocate resources for a job request * IN job_desc_msg - description of resource allocation request * OUT slurm_alloc_msg - response to request * RET 0 on success, otherwise return -1 and set errno to indicate the error * NOTE: free the response using slurm_free_resource_allocation_response_msg() */ int slurm_allocate_resources (job_desc_msg_t *req, resource_allocation_response_msg_t **resp) { int rc; slurm_msg_t req_msg; slurm_msg_t resp_msg; bool host_set = false; char host[64]; slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); /* * set Node and session id for this request */ if (req->alloc_sid == NO_VAL) req->alloc_sid = getsid(0); if ( (req->alloc_node == NULL) && (gethostname_short(host, sizeof(host)) == 0) ) { req->alloc_node = host; host_set = true; } req_msg.msg_type = REQUEST_RESOURCE_ALLOCATION; req_msg.data = req; rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg, working_cluster_rec); /* * Clear this hostname if set internally to this function * (memory is on the stack) */ if (host_set) req->alloc_node = NULL; if (rc == SLURM_SOCKET_ERROR) return SLURM_SOCKET_ERROR; switch (resp_msg.msg_type) { case RESPONSE_SLURM_RC: if (_handle_rc_msg(&resp_msg) < 0) return SLURM_PROTOCOL_ERROR; *resp = NULL; break; case RESPONSE_RESOURCE_ALLOCATION: *resp = (resource_allocation_response_msg_t *) resp_msg.data; break; default: slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); } return SLURM_PROTOCOL_SUCCESS; }
/* * get_mach_name - Return the name of this node * Input: node_name - buffer for the node name, must be at least MAX_SLURM_NAME characters * Output: node_name - filled in with node name * return code - 0 if no error, otherwise errno */ extern int get_mach_name(char *node_name) { int error_code; error_code = gethostname_short(node_name, MAX_SLURM_NAME); if (error_code != 0) error ("get_mach_name: gethostname_short error %d", error_code); return error_code; }
/* * slurm_job_will_run - determine if a job would execute immediately if * submitted now * IN job_desc_msg - description of resource allocation request * RET 0 on success, otherwise return -1 and set errno to indicate the error */ int slurm_job_will_run (job_desc_msg_t *req) { will_run_response_msg_t *will_run_resp = NULL; char buf[64]; bool host_set = false; int rc; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); char *type = "processors"; if ((req->alloc_node == NULL) && (gethostname_short(buf, sizeof(buf)) == 0)) { req->alloc_node = buf; host_set = true; } rc = slurm_job_will_run2(req, &will_run_resp); if ((rc == 0) && will_run_resp) { if (cluster_flags & CLUSTER_FLAG_BG) type = "cnodes"; slurm_make_time_str(&will_run_resp->start_time, buf, sizeof(buf)); info("Job %u to start at %s using %u %s" " on %s", will_run_resp->job_id, buf, will_run_resp->proc_cnt, type, will_run_resp->node_list); if (will_run_resp->preemptee_job_id) { ListIterator itr; uint32_t *job_id_ptr; char *job_list = NULL, *sep = ""; itr = list_iterator_create(will_run_resp-> preemptee_job_id); while ((job_id_ptr = list_next(itr))) { if (job_list) sep = ","; xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr); } list_iterator_destroy(itr); info(" Preempts: %s", job_list); xfree(job_list); } slurm_free_will_run_response_msg(will_run_resp); } if (host_set) req->alloc_node = NULL; return rc; }
static char * _guess_nodename() { char host[256]; char *nodename = NULL; if (gethostname_short(host, 256) != 0) return NULL; nodename = slurm_conf_get_nodename(host); if (nodename == NULL) nodename = slurm_conf_get_aliased_nodename(); if (nodename == NULL) /* if no match, try localhost */ nodename = slurm_conf_get_nodename("localhost"); return nodename; }
extern void create_daemon_popup(GtkAction *action, gpointer user_data) { GtkWidget *popup = gtk_dialog_new_with_buttons( "SLURM Daemons running", GTK_WINDOW(user_data), GTK_DIALOG_DESTROY_WITH_PARENT, GTK_STOCK_CLOSE, GTK_RESPONSE_OK, NULL); int update = 0; slurm_ctl_conf_info_msg_t *conf; char me[MAX_SLURM_NAME], *b, *c, *n; int actld = 0, ctld = 0, d = 0; GtkTreeStore *treestore = _local_create_treestore_2cols(popup, 300, 100); GtkTreeIter iter; g_signal_connect(G_OBJECT(popup), "delete_event", G_CALLBACK(_delete_popup), NULL); g_signal_connect(G_OBJECT(popup), "response", G_CALLBACK(_delete_popup), NULL); slurm_conf_init(NULL); conf = slurm_conf_lock(); gethostname_short(me, MAX_SLURM_NAME); if ((b = conf->backup_controller)) { if ((strcmp(b, me) == 0) || (strcasecmp(b, "localhost") == 0)) ctld = 1; } if ((c = conf->control_machine)) { actld = 1; if ((strcmp(c, me) == 0) || (strcasecmp(c, "localhost") == 0)) ctld = 1; } slurm_conf_unlock(); if ((n = slurm_conf_get_nodename(me))) { d = 1; xfree(n); } else if ((n = slurm_conf_get_aliased_nodename())) { d = 1; xfree(n); } else if ((n = slurm_conf_get_nodename("localhost"))) { d = 1; xfree(n); } if (actld && ctld) add_display_treestore_line(update, treestore, &iter, "Slurmctld", "1"); if (actld && d) add_display_treestore_line(update, treestore, &iter, "Slurmd", "1"); gtk_widget_show_all(popup); return; }
/* * slurm_job_will_run - determine if a job would execute immediately if * submitted now * IN job_desc_msg - description of resource allocation request * RET 0 on success, otherwise return -1 and set errno to indicate the error */ int slurm_job_will_run (job_desc_msg_t *req) { slurm_msg_t req_msg, resp_msg; will_run_response_msg_t *will_run_resp; char buf[64]; bool host_set = false; int rc; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); char *type = "processors"; /* req.immediate = true; implicit */ if ((req->alloc_node == NULL) && (gethostname_short(buf, sizeof(buf)) == 0)) { req->alloc_node = buf; host_set = true; } slurm_msg_t_init(&req_msg); req_msg.msg_type = REQUEST_JOB_WILL_RUN; req_msg.data = req; rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg); if (host_set) req->alloc_node = NULL; if (rc < 0) return SLURM_SOCKET_ERROR; switch (resp_msg.msg_type) { case RESPONSE_SLURM_RC: if (_handle_rc_msg(&resp_msg) < 0) return SLURM_PROTOCOL_ERROR; break; case RESPONSE_JOB_WILL_RUN: if(cluster_flags & CLUSTER_FLAG_BG) type = "cnodes"; will_run_resp = (will_run_response_msg_t *) resp_msg.data; slurm_make_time_str(&will_run_resp->start_time, buf, sizeof(buf)); info("Job %u to start at %s using %u %s" " on %s", will_run_resp->job_id, buf, will_run_resp->proc_cnt, type, will_run_resp->node_list); if (will_run_resp->preemptee_job_id) { ListIterator itr; uint32_t *job_id_ptr; char *job_list = NULL, *sep = ""; itr = list_iterator_create(will_run_resp-> preemptee_job_id); while ((job_id_ptr = list_next(itr))) { if (job_list) sep = ","; xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr); } info(" Preempts: %s", job_list); xfree(job_list); } slurm_free_will_run_response_msg(will_run_resp); break; default: slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); break; } return SLURM_PROTOCOL_SUCCESS; }
/* * slurm_job_will_run - determine if a job would execute immediately if * submitted now * IN job_desc_msg - description of resource allocation request * RET 0 on success, otherwise return -1 and set errno to indicate the error */ int slurm_job_will_run(job_desc_msg_t *req) { will_run_response_msg_t *will_run_resp = NULL; char buf[64], local_hostname[64]; int rc; char *cluster_name = NULL; void *ptr = NULL; if ((req->alloc_node == NULL) && (gethostname_short(local_hostname, sizeof(local_hostname)) == 0)) { req->alloc_node = local_hostname; } if (working_cluster_rec) cluster_name = working_cluster_rec->name; else cluster_name = slurmctld_conf.cluster_name; if (!slurm_load_federation(&ptr) && cluster_in_federation(ptr, cluster_name)) rc = _fed_job_will_run(req, &will_run_resp, ptr); else rc = slurm_job_will_run2(req, &will_run_resp); if (will_run_resp) print_multi_line_string( will_run_resp->job_submit_user_msg, -1); if ((rc == 0) && will_run_resp) { slurm_make_time_str(&will_run_resp->start_time, buf, sizeof(buf)); if (will_run_resp->part_name) { info("Job %u to start at %s using %u processors on nodes %s in partition %s", will_run_resp->job_id, buf, will_run_resp->proc_cnt, will_run_resp->node_list, will_run_resp->part_name); } else { /* * Partition name not provided from slurmctld v17.11 * or earlier. Remove this in the future. */ info("Job %u to start at %s using %u processors on nodes %s", will_run_resp->job_id, buf, will_run_resp->proc_cnt, will_run_resp->node_list); } if (will_run_resp->preemptee_job_id) { ListIterator itr; uint32_t *job_id_ptr; char *job_list = NULL, *sep = ""; itr = list_iterator_create(will_run_resp-> preemptee_job_id); while ((job_id_ptr = list_next(itr))) { if (job_list) sep = ","; xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr); } list_iterator_destroy(itr); info(" Preempts: %s", job_list); xfree(job_list); } slurm_free_will_run_response_msg(will_run_resp); } if (req->alloc_node == local_hostname) req->alloc_node = NULL; if (ptr) slurm_destroy_federation_rec(ptr); return rc; }
/* main - slurmctld main function, start various threads and process RPCs */ int main(int argc, char *argv[]) { pthread_attr_t thread_attr; char node_name[128]; void *db_conn = NULL; assoc_init_args_t assoc_init_arg; _init_config(); log_init(argv[0], log_opts, LOG_DAEMON, NULL); if (read_slurmdbd_conf()) exit(1); _parse_commandline(argc, argv); _update_logging(true); _update_nice(); if (slurm_auth_init(NULL) != SLURM_SUCCESS) { fatal("Unable to initialize %s authentication plugin", slurmdbd_conf->auth_type); } if (slurm_acct_storage_init(NULL) != SLURM_SUCCESS) { fatal("Unable to initialize %s accounting storage plugin", slurmdbd_conf->storage_type); } _kill_old_slurmdbd(); if (foreground == 0) _daemonize(); /* * Need to create pidfile here in case we setuid() below * (init_pidfile() exits if it can't initialize pid file). * On Linux we also need to make this setuid job explicitly * able to write a core dump. * This also has to happen after daemon(), which closes all fd's, * so we keep the write lock of the pidfile. */ _init_pidfile(); _become_slurm_user(); if (foreground == 0) _set_work_dir(); log_config(); #ifdef PR_SET_DUMPABLE if (prctl(PR_SET_DUMPABLE, 1) < 0) debug ("Unable to set dumpable to 1"); #endif /* PR_SET_DUMPABLE */ if (xsignal_block(dbd_sigarray) < 0) error("Unable to block signals"); /* Create attached thread for signal handling */ slurm_attr_init(&thread_attr); if (pthread_create(&signal_handler_thread, &thread_attr, _signal_handler, NULL)) fatal("pthread_create %m"); slurm_attr_destroy(&thread_attr); registered_clusters = list_create(NULL); slurm_attr_init(&thread_attr); if (pthread_create(&commit_handler_thread, &thread_attr, _commit_handler, NULL)) fatal("pthread_create %m"); slurm_attr_destroy(&thread_attr); memset(&assoc_init_arg, 0, sizeof(assoc_init_args_t)); /* If we are tacking wckey we need to cache wckeys, if we aren't only cache the users, qos */ assoc_init_arg.cache_level = ASSOC_MGR_CACHE_USER | ASSOC_MGR_CACHE_QOS; if (slurmdbd_conf->track_wckey) assoc_init_arg.cache_level |= ASSOC_MGR_CACHE_WCKEY; db_conn = acct_storage_g_get_connection(NULL, 0, true, NULL); if (assoc_mgr_init(db_conn, &assoc_init_arg, errno) == SLURM_ERROR) { error("Problem getting cache of data"); acct_storage_g_close_connection(&db_conn); goto end_it; } if (gethostname_short(node_name, sizeof(node_name))) fatal("getnodename: %m"); while (1) { if (slurmdbd_conf->dbd_backup && (!strcmp(node_name, slurmdbd_conf->dbd_backup) || !strcmp(slurmdbd_conf->dbd_backup, "localhost"))) { info("slurmdbd running in background mode"); have_control = false; backup = true; /* make sure any locks are released */ acct_storage_g_commit(db_conn, 1); run_dbd_backup(); if (!shutdown_time) assoc_mgr_refresh_lists(db_conn); } else if (slurmdbd_conf->dbd_host && (!strcmp(slurmdbd_conf->dbd_host, node_name) || !strcmp(slurmdbd_conf->dbd_host, "localhost"))) { backup = false; have_control = true; } else { fatal("This host not configured to run SlurmDBD " "(%s != %s | (backup) %s)", node_name, slurmdbd_conf->dbd_host, slurmdbd_conf->dbd_backup); } if (!shutdown_time) { /* Create attached thread to process incoming RPCs */ slurm_attr_init(&thread_attr); if (pthread_create(&rpc_handler_thread, &thread_attr, rpc_mgr, NULL)) fatal("pthread_create error %m"); slurm_attr_destroy(&thread_attr); } if (!shutdown_time) { /* Create attached thread to do usage rollup */ slurm_attr_init(&thread_attr); if (pthread_create(&rollup_handler_thread, &thread_attr, _rollup_handler, db_conn)) fatal("pthread_create error %m"); slurm_attr_destroy(&thread_attr); } /* Daemon is fully operational here */ if (!shutdown_time || primary_resumed) { shutdown_time = 0; info("slurmdbd version %s started", SLURM_VERSION_STRING); if (backup) run_dbd_backup(); } _request_registrations(db_conn); acct_storage_g_commit(db_conn, 1); /* this is only ran if not backup */ if (rollup_handler_thread) pthread_join(rollup_handler_thread, NULL); if (rpc_handler_thread) pthread_join(rpc_handler_thread, NULL); if (backup && primary_resumed) { shutdown_time = 0; info("Backup has given up control"); } if (shutdown_time) break; } /* Daemon termination handled here */ end_it: if (signal_handler_thread) pthread_join(signal_handler_thread, NULL); if (commit_handler_thread) pthread_join(commit_handler_thread, NULL); acct_storage_g_commit(db_conn, 1); acct_storage_g_close_connection(&db_conn); if (slurmdbd_conf->pid_file && (unlink(slurmdbd_conf->pid_file) < 0)) { verbose("Unable to remove pidfile '%s': %m", slurmdbd_conf->pid_file); } FREE_NULL_LIST(registered_clusters); assoc_mgr_fini(NULL); slurm_acct_storage_fini(); slurm_auth_fini(); log_fini(); free_slurmdbd_conf(); exit(0); }
/* * slurm_load_slurmd_status - issue RPC to get the status of slurmd * daemon on this machine * IN slurmd_info_ptr - place to store slurmd status information * RET 0 or -1 on error * NOTE: free the response using slurm_free_slurmd_status() */ extern int slurm_load_slurmd_status(slurmd_status_t **slurmd_status_ptr) { int rc; slurm_msg_t req_msg; slurm_msg_t resp_msg; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); char *this_addr; slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); if (cluster_flags & CLUSTER_FLAG_MULTSD) { if ((this_addr = getenv("SLURMD_NODENAME"))) { slurm_conf_get_addr(this_addr, &req_msg.address); } else { this_addr = "localhost"; slurm_set_addr(&req_msg.address, (uint16_t)slurm_get_slurmd_port(), this_addr); } } else { char this_host[256]; /* * Set request message address to slurmd on localhost */ gethostname_short(this_host, sizeof(this_host)); this_addr = slurm_conf_get_nodeaddr(this_host); if (this_addr == NULL) this_addr = xstrdup("localhost"); slurm_set_addr(&req_msg.address, (uint16_t)slurm_get_slurmd_port(), this_addr); xfree(this_addr); } req_msg.msg_type = REQUEST_DAEMON_STATUS; req_msg.data = NULL; rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0); if ((rc != 0) || !resp_msg.auth_cred) { error("slurm_slurmd_info: %m"); if (resp_msg.auth_cred) g_slurm_auth_destroy(resp_msg.auth_cred); return SLURM_ERROR; } if (resp_msg.auth_cred) g_slurm_auth_destroy(resp_msg.auth_cred); switch (resp_msg.msg_type) { case RESPONSE_SLURMD_STATUS: *slurmd_status_ptr = (slurmd_status_t *) resp_msg.data; break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; slurm_free_return_code_msg(resp_msg.data); if (rc) slurm_seterrno_ret(rc); break; default: slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); break; } return SLURM_PROTOCOL_SUCCESS; }
/* * slurm_pack_job_will_run - determine if a heterogenous job would execute * immediately if submitted now * IN job_req_list - List of job_desc_msg_t structures describing the resource * allocation request * RET 0 on success, otherwise return -1 and set errno to indicate the error */ extern int slurm_pack_job_will_run(List job_req_list) { job_desc_msg_t *req; will_run_response_msg_t *will_run_resp; char buf[64], local_hostname[64] = "", *sep = ""; int rc = SLURM_SUCCESS; char *type = "processors"; ListIterator iter, itr; time_t first_start = (time_t) 0; uint32_t first_job_id = 0, tot_proc_count = 0, *job_id_ptr; hostset_t hs = NULL; char *job_list = NULL; if (!job_req_list || (list_count(job_req_list) == 0)) { error("No job descriptors input"); return SLURM_ERROR; } (void) gethostname_short(local_hostname, sizeof(local_hostname)); iter = list_iterator_create(job_req_list); while ((req = (job_desc_msg_t *) list_next(iter))) { if ((req->alloc_node == NULL) && local_hostname[0]) req->alloc_node = local_hostname; will_run_resp = NULL; rc = slurm_job_will_run2(req, &will_run_resp); if ((rc == SLURM_SUCCESS) && will_run_resp) { if (first_job_id == 0) first_job_id = will_run_resp->job_id; if ((first_start == 0) || (first_start < will_run_resp->start_time)) first_start = will_run_resp->start_time; tot_proc_count += will_run_resp->proc_cnt; if (hs) hostset_insert(hs, will_run_resp->node_list); else hs = hostset_create(will_run_resp->node_list); if (will_run_resp->preemptee_job_id) { itr = list_iterator_create(will_run_resp-> preemptee_job_id); while ((job_id_ptr = list_next(itr))) { if (job_list) sep = ","; xstrfmtcat(job_list, "%s%u", sep, *job_id_ptr); } list_iterator_destroy(itr); } slurm_free_will_run_response_msg(will_run_resp); } if (req->alloc_node == local_hostname) req->alloc_node = NULL; if (rc != SLURM_SUCCESS) break; } list_iterator_destroy(iter); if (rc == SLURM_SUCCESS) { uint32_t cluster_flags = slurmdb_setup_cluster_flags(); char node_list[1028] = ""; if (cluster_flags & CLUSTER_FLAG_BG) type = "cnodes"; if (hs) hostset_ranged_string(hs, sizeof(node_list), node_list); slurm_make_time_str(&first_start, buf, sizeof(buf)); info("Job %u to start at %s using %u %s on %s", first_job_id, buf, tot_proc_count, type, node_list); if (job_list) info(" Preempts: %s", job_list); } if (hs) hostset_destroy(hs); xfree(job_list); return rc; }
/* * parse_command_line */ extern void parse_command_line( int argc, char* argv[] ) { char *env_val = NULL; bool override_format_env = false; int opt_char; int option_index; static struct option long_options[] = { {"accounts", required_argument, 0, 'A'}, {"all", no_argument, 0, 'a'}, {"format", required_argument, 0, 'o'}, {"help", no_argument, 0, OPT_LONG_HELP}, {"hide", no_argument, 0, OPT_LONG_HIDE}, {"iterate", required_argument, 0, 'i'}, {"jobs", optional_argument, 0, 'j'}, {"long", no_argument, 0, 'l'}, {"cluster", required_argument, 0, 'M'}, {"clusters", required_argument, 0, 'M'}, {"node", required_argument, 0, 'n'}, {"nodes", required_argument, 0, 'n'}, {"noheader", no_argument, 0, 'h'}, {"partitions", required_argument, 0, 'p'}, {"qos", required_argument, 0, 'q'}, {"reservation",required_argument, 0, 'R'}, {"sort", required_argument, 0, 'S'}, {"start", no_argument, 0, OPT_LONG_START}, {"steps", optional_argument, 0, 's'}, {"states", required_argument, 0, 't'}, {"usage", no_argument, 0, OPT_LONG_USAGE}, {"user", required_argument, 0, 'u'}, {"users", required_argument, 0, 'u'}, {"verbose", no_argument, 0, 'v'}, {"version", no_argument, 0, 'V'}, {NULL, 0, 0, 0} }; if (getenv("SQUEUE_ALL")) params.all_flag = true; if ( ( env_val = getenv("SQUEUE_SORT") ) ) params.sort = xstrdup(env_val); if ( ( env_val = getenv("SLURM_CLUSTERS") ) ) { if (!(params.clusters = slurmdb_get_info_cluster(env_val))) { error("'%s' can't be reached now, " "or it is an invalid entry for " "SLURM_CLUSTERS. Use 'sacctmgr --list " "cluster' to see avaliable clusters.", env_val); exit(1); } working_cluster_rec = list_peek(params.clusters); } while ((opt_char = getopt_long(argc, argv, "A:ahi:j::ln:M:o:p:q:R:s::S:t:u:U:vV", long_options, &option_index)) != -1) { switch (opt_char) { case (int)'?': fprintf(stderr, "Try \"squeue --help\" " "for more information\n"); exit(1); case (int) 'A': case (int) 'U': /* backwards compatibility */ xfree(params.accounts); params.accounts = xstrdup(optarg); params.account_list = _build_str_list( params.accounts ); break; case (int)'a': params.all_flag = true; break; case (int)'h': params.no_header = true; break; case (int) 'i': params.iterate= atoi(optarg); if (params.iterate <= 0) { error ("--iterate=%s\n", optarg); exit(1); } break; case (int) 'j': if (optarg) { params.jobs = xstrdup(optarg); params.job_list = _build_job_list(params.jobs); } params.job_flag = true; break; case (int) 'l': params.long_list = true; override_format_env = true; break; case (int) 'M': if (params.clusters) list_destroy(params.clusters); if (!(params.clusters = slurmdb_get_info_cluster(optarg))) { error("'%s' can't be reached now, " "or it is an invalid entry for " "--cluster. Use 'sacctmgr --list " "cluster' to see avaliable clusters.", optarg); exit(1); } working_cluster_rec = list_peek(params.clusters); break; case (int) 'n': if (params.nodes) hostset_destroy(params.nodes); params.nodes = hostset_create(optarg); if (params.nodes == NULL) { error("'%s' invalid entry for --nodes", optarg); exit(1); } break; case (int) 'o': xfree(params.format); params.format = xstrdup(optarg); override_format_env = true; break; case (int) 'p': xfree(params.partitions); params.partitions = xstrdup(optarg); params.part_list = _build_str_list( params.partitions ); params.all_flag = true; break; case (int) 'q': xfree(params.qoss); params.qoss = xstrdup(optarg); params.qos_list = _build_str_list( params.qoss ); break; case (int) 'R': xfree(params.reservation); params.reservation = xstrdup(optarg); break; case (int) 's': if (optarg) { params.steps = xstrdup(optarg); params.step_list = _build_step_list(params.steps); } params.step_flag = true; override_format_env = true; break; case (int) 'S': xfree(params.sort); params.sort = xstrdup(optarg); break; case (int) 't': xfree(params.states); params.states = xstrdup(optarg); params.state_list = _build_state_list( params.states ); break; case (int) 'u': xfree(params.users); params.users = xstrdup(optarg); params.user_list = _build_user_list( params.users ); break; case (int) 'v': params.verbose++; break; case (int) 'V': print_slurm_version(); exit(0); case OPT_LONG_HELP: _help(); exit(0); case OPT_LONG_HIDE: params.all_flag = false; break; case OPT_LONG_START: params.start_flag = true; break; case OPT_LONG_USAGE: _usage(); exit(0); } } if ( override_format_env == false ) { if ( ( env_val = getenv("SQUEUE_FORMAT") ) ) params.format = xstrdup(env_val); } params.cluster_flags = slurmdb_setup_cluster_flags(); if (optind < argc) { if (params.job_flag) { params.jobs = xstrdup(argv[optind++]); params.job_list = _build_job_list(params.jobs); } else if (params.step_flag) { params.steps = xstrdup(argv[optind++]); params.step_list = _build_step_list(params.steps); } if (optind < argc) { error("Unrecognized option: %s",argv[optind]); _usage(); exit(1); } } if ( params.job_flag && params.step_flag) { if (params.job_list) { verbose("Printing job steps with job filter"); params.job_flag = false; } else { error("Incompatible options --jobs and --steps"); exit(1); } } if ( params.nodes ) { char *name1 = NULL; char *name2 = NULL; hostset_t nodenames = hostset_create(NULL); if (nodenames == NULL) fatal("malloc failure"); while ( hostset_count(params.nodes) > 0 ) { name1 = hostset_pop(params.nodes); /* localhost = use current host name */ if ( strcasecmp("localhost", name1) == 0 ) { name2 = xmalloc(128); gethostname_short(name2, 128); } else { /* translate NodeHostName to NodeName */ name2 = slurm_conf_get_nodename(name1); /* use NodeName if translation failed */ if ( name2 == NULL ) name2 = xstrdup(name1); } hostset_insert(nodenames, name2); free(name1); xfree(name2); } /* Replace params.nodename with the new one */ hostset_destroy(params.nodes); params.nodes = nodenames; } if ( ( params.accounts == NULL ) && ( env_val = getenv("SQUEUE_ACCOUNT") ) ) { params.accounts = xstrdup(env_val); params.account_list = _build_str_list( params.accounts ); } if ( ( params.partitions == NULL ) && ( env_val = getenv("SQUEUE_PARTITION") ) ) { params.partitions = xstrdup(env_val); params.part_list = _build_str_list( params.partitions ); params.all_flag = true; } if ( ( params.qoss == NULL ) && ( env_val = getenv("SQUEUE_QOS") ) ) { params.qoss = xstrdup(env_val); params.qos_list = _build_str_list( params.qoss ); } if ( ( params.states == NULL ) && ( env_val = getenv("SQUEUE_STATES") ) ) { params.states = xstrdup(env_val); params.state_list = _build_state_list( params.states ); } if ( ( params.users == NULL ) && ( env_val = getenv("SQUEUE_USERS") ) ) { params.users = xstrdup(env_val); params.user_list = _build_user_list( params.users ); } if ( params.start_flag && !params.step_flag ) { /* Set more defaults */ if (params.format == NULL) params.format = xstrdup("%.7i %.9P %.8j %.8u %.2t %.19S %.6D %R"); if (params.sort == NULL) params.sort = xstrdup("S"); if (params.states == NULL) { params.states = xstrdup("PD"); params.state_list = _build_state_list( params.states ); } } params.max_cpus = _max_cpus_per_node(); if ( params.verbose ) _print_options(); }
/* Wait for barrier and get full PMI Keyval space data */ int slurm_get_kvs_comm_set(struct kvs_comm_set **kvs_set_ptr, int pmi_rank, int pmi_size) { int rc, srun_fd, retries = 0, timeout = 0; slurm_msg_t msg_send, msg_rcv; slurm_addr_t slurm_addr, srun_reply_addr; char hostname[64]; uint16_t port; kvs_get_msg_t data; char *env_pmi_ifhn; if (kvs_set_ptr == NULL) return EINVAL; *kvs_set_ptr = NULL; /* initialization */ if ((rc = _get_addr()) != SLURM_SUCCESS) { error("_get_addr: %m"); return rc; } _set_pmi_time(); if (pmi_fd < 0) { if ((pmi_fd = slurm_init_msg_engine_port(0)) < 0) { error("slurm_init_msg_engine_port: %m"); return SLURM_ERROR; } fd_set_blocking(pmi_fd); } if (slurm_get_stream_addr(pmi_fd, &slurm_addr) < 0) { error("slurm_get_stream_addr: %m"); return SLURM_ERROR; } /* hostname is not set here, so slurm_get_addr fails slurm_get_addr(&slurm_addr, &port, hostname, sizeof(hostname)); */ port = ntohs(slurm_addr.sin_port); if ((env_pmi_ifhn = getenv("SLURM_PMI_RESP_IFHN"))) { strncpy(hostname, env_pmi_ifhn, sizeof(hostname)); hostname[sizeof(hostname)-1] = 0; } else gethostname_short(hostname, sizeof(hostname)); data.task_id = pmi_rank; data.size = pmi_size; data.port = port; data.hostname = hostname; slurm_msg_t_init(&msg_send); slurm_msg_t_init(&msg_rcv); msg_send.address = srun_addr; msg_send.msg_type = PMI_KVS_GET_REQ; msg_send.data = &data; /* Send the RPC to the local srun communcation manager. * Since the srun can be sent thousands of messages at * the same time and refuse some connections, retry as * needed. Wait until all key-pairs have been sent by * all tasks then spread out messages by task's rank. * Also increase the message timeout if many tasks * since the srun command can get very overloaded (the * default timeout is 10 secs). */ _delay_rpc(pmi_rank, pmi_size); if (pmi_size > 4000) /* 240 secs */ timeout = slurm_get_msg_timeout() * 24000; else if (pmi_size > 1000) /* 120 secs */ timeout = slurm_get_msg_timeout() * 12000; else if (pmi_size > 100) /* 60 secs */ timeout = slurm_get_msg_timeout() * 6000; else if (pmi_size > 10) /* 20 secs */ timeout = slurm_get_msg_timeout() * 2000; while (slurm_send_recv_rc_msg_only_one(&msg_send, &rc, timeout) < 0) { if (retries++ > MAX_RETRIES) { error("slurm_get_kvs_comm_set: %m"); return SLURM_ERROR; } else debug("get kvs retry %d", retries); _delay_rpc(pmi_rank, pmi_size); } if (rc != SLURM_SUCCESS) { error("slurm_get_kvs_comm_set error_code=%d", rc); return rc; } /* get the message after all tasks reach the barrier */ srun_fd = slurm_accept_msg_conn(pmi_fd, &srun_reply_addr); if (srun_fd < 0) { error("slurm_accept_msg_conn: %m"); return errno; } while ((rc = slurm_receive_msg(srun_fd, &msg_rcv, timeout)) != 0) { if (errno == EINTR) continue; error("slurm_receive_msg: %m"); slurm_close(srun_fd); return errno; } if (msg_rcv.auth_cred) (void)g_slurm_auth_destroy(msg_rcv.auth_cred); if (msg_rcv.msg_type != PMI_KVS_GET_RESP) { error("slurm_get_kvs_comm_set msg_type=%d", msg_rcv.msg_type); slurm_close(srun_fd); return SLURM_UNEXPECTED_MSG_ERROR; } if (slurm_send_rc_msg(&msg_rcv, SLURM_SUCCESS) < 0) error("slurm_send_rc_msg: %m"); slurm_close(srun_fd); *kvs_set_ptr = msg_rcv.data; rc = _forward_comm_set(*kvs_set_ptr); return rc; }
/* * slurm_get_node_energy_n - issue RPC to get the energy data of all * configured sensors on the target machine * IN host - name of node to query, NULL if localhost * IN delta - Use cache if data is newer than this in seconds * OUT sensors_cnt - number of sensors * OUT energy - array of acct_gather_energy_t structures on success or * NULL other wise * RET 0 on success or a slurm error code * NOTE: free the response using xfree */ extern int slurm_get_node_energy(char *host, uint16_t delta, uint16_t *sensor_cnt, acct_gather_energy_t **energy) { int rc; slurm_msg_t req_msg; slurm_msg_t resp_msg; acct_gather_energy_req_msg_t req; uint32_t cluster_flags = slurmdb_setup_cluster_flags(); char *this_addr; xassert(sensor_cnt); xassert(energy); *sensor_cnt = 0; *energy = NULL; slurm_msg_t_init(&req_msg); slurm_msg_t_init(&resp_msg); if (host) slurm_conf_get_addr(host, &req_msg.address); else if (cluster_flags & CLUSTER_FLAG_MULTSD) { if ((this_addr = getenv("SLURMD_NODENAME"))) { slurm_conf_get_addr(this_addr, &req_msg.address); } else { this_addr = "localhost"; slurm_set_addr(&req_msg.address, (uint16_t)slurm_get_slurmd_port(), this_addr); } } else { char this_host[256]; /* * Set request message address to slurmd on localhost */ gethostname_short(this_host, sizeof(this_host)); this_addr = slurm_conf_get_nodeaddr(this_host); if (this_addr == NULL) this_addr = xstrdup("localhost"); slurm_set_addr(&req_msg.address, (uint16_t)slurm_get_slurmd_port(), this_addr); xfree(this_addr); } req.delta = delta; req_msg.msg_type = REQUEST_ACCT_GATHER_ENERGY; req_msg.data = &req; rc = slurm_send_recv_node_msg(&req_msg, &resp_msg, 0); if (rc != 0 || !resp_msg.auth_cred) { error("slurm_get_node_energy: %m"); if (resp_msg.auth_cred) g_slurm_auth_destroy(resp_msg.auth_cred); return SLURM_ERROR; } if (resp_msg.auth_cred) g_slurm_auth_destroy(resp_msg.auth_cred); switch (resp_msg.msg_type) { case RESPONSE_ACCT_GATHER_ENERGY: *sensor_cnt = ((acct_gather_node_resp_msg_t *) resp_msg.data)->sensor_cnt; *energy = ((acct_gather_node_resp_msg_t *) resp_msg.data)->energy; ((acct_gather_node_resp_msg_t *) resp_msg.data)->energy = NULL; slurm_free_acct_gather_node_resp_msg(resp_msg.data); break; case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; slurm_free_return_code_msg(resp_msg.data); if (rc) slurm_seterrno_ret(rc); break; default: slurm_seterrno_ret(SLURM_UNEXPECTED_MSG_ERROR); break; } return SLURM_PROTOCOL_SUCCESS; }