/* * send_slurmctld_register_req - request register from slurmctld * IN host: control host of cluster * IN port: control port of cluster * IN rpc_version: rpc version of cluster * RET: error code */ static int _send_slurmctld_register_req(slurmdb_cluster_rec_t *cluster_rec) { slurm_addr_t ctld_address; slurm_fd_t fd; int rc = SLURM_SUCCESS; slurm_set_addr_char(&ctld_address, cluster_rec->control_port, cluster_rec->control_host); fd = slurm_open_msg_conn(&ctld_address); if (fd < 0) { rc = SLURM_ERROR; } else { slurm_msg_t out_msg; slurm_msg_t_init(&out_msg); out_msg.msg_type = ACCOUNTING_REGISTER_CTLD; out_msg.flags = SLURM_GLOBAL_AUTH_KEY; out_msg.protocol_version = slurmdbd_translate_rpc(cluster_rec->rpc_version); slurm_send_node_msg(fd, &out_msg); /* We probably need to add matching recv_msg function * for an arbitray fd or should these be fire * and forget? For this, that we can probably * forget about it */ slurm_close_stream(fd); } return rc; }
/* Open event_fd as needed * RET 0 on success, -1 on failure */ static int _open_fd(time_t now) { if (event_fd != -1) return 0; /* Identify address for socket connection. * Done only on first call, then cached. */ if (event_addr_set == 0) { slurm_set_addr(&moab_event_addr, e_port, e_host); event_addr_set = 1; if (e_host_bu[0] != '\0') { slurm_set_addr(&moab_event_addr_bu, e_port, e_host_bu); event_addr_set = 2; } } /* Open the event port on moab as needed */ if (event_fd == -1) { event_fd = slurm_open_msg_conn(&moab_event_addr); if (event_fd == -1) { error("Unable to open primary wiki " "event port %s:%u: %m", e_host, e_port); } } if ((event_fd == -1) && (event_addr_set == 2)) { event_fd = slurm_open_msg_conn(&moab_event_addr_bu); if (event_fd == -1) { error("Unable to open backup wiki " "event port %s:%u: %m", e_host_bu, e_port); } } if (event_fd == -1) return -1; /* We can't have the controller block on the following write() */ fd_set_nonblocking(event_fd); return 0; }
extern int slurm_persist_conn_open_without_init( slurm_persist_conn_t *persist_conn) { slurm_addr_t addr; xassert(persist_conn); xassert(persist_conn->rem_host); xassert(persist_conn->rem_port); xassert(persist_conn->cluster_name); if (persist_conn->fd > 0) _close_fd(&persist_conn->fd); else persist_conn->fd = -1; if (!persist_conn->inited) persist_conn->inited = true; if (!persist_conn->version) { /* Set to MIN_PROTOCOL so that a higher version controller can * talk to a lower protocol version controller. When talking to * the DBD, the protocol version should be set to the current * protocol version prior to calling this. */ persist_conn->version = SLURM_MIN_PROTOCOL_VERSION; } if (persist_conn->timeout < 0) persist_conn->timeout = slurm_get_msg_timeout() * 1000; slurm_set_addr_char(&addr, persist_conn->rem_port, persist_conn->rem_host); if ((persist_conn->fd = slurm_open_msg_conn(&addr)) < 0) { if (_comm_fail_log(persist_conn)) { char *s = xstrdup_printf("%s: failed to open persistent connection to %s:%d: %m", __func__, persist_conn->rem_host, persist_conn->rem_port); if (persist_conn->flags & PERSIST_FLAG_SUPPRESS_ERR) debug2("%s", s); else error("%s", s); xfree(s); } return SLURM_ERROR; } fd_set_nonblocking(persist_conn->fd); fd_set_close_on_exec(persist_conn->fd); return SLURM_SUCCESS; }
/* * cluster_first_reg - ask for controller to send nodes in a down state * and jobs pending or running on first registration. * * IN host: controller host * IN port: controller port * IN rpc_version: controller rpc version * RET: error code */ extern int cluster_first_reg(char *host, uint16_t port, uint16_t rpc_version) { slurm_addr_t ctld_address; int fd; int rc = SLURM_SUCCESS; info("First time to register cluster requesting " "running jobs and system information."); slurm_set_addr_char(&ctld_address, port, host); fd = slurm_open_msg_conn(&ctld_address); if (fd < 0) { error("can not open socket back to slurmctld " "%s(%u): %m", host, port); rc = SLURM_ERROR; } else { slurm_msg_t out_msg; accounting_update_msg_t update; /* We have to put this update message here so we can tell the sender to send the correct RPC version. */ memset(&update, 0, sizeof(accounting_update_msg_t)); update.rpc_version = rpc_version; slurm_msg_t_init(&out_msg); out_msg.msg_type = ACCOUNTING_FIRST_REG; out_msg.flags = SLURM_GLOBAL_AUTH_KEY; out_msg.data = &update; slurm_send_node_msg(fd, &out_msg); /* We probably need to add matching recv_msg function * for an arbitray fd or should these be fire * and forget? For this, that we can probably * forget about it */ close(fd); } return rc; }
void *_forward_thread(void *arg) { forward_msg_t *fwd_msg = (forward_msg_t *)arg; forward_struct_t *fwd_struct = fwd_msg->fwd_struct; Buf buffer = init_buf(BUF_SIZE); /* probably enough for header */ List ret_list = NULL; int fd = -1; ret_data_info_t *ret_data_info = NULL; char *name = NULL; hostlist_t hl = hostlist_create(fwd_msg->header.forward.nodelist); slurm_addr_t addr; char *buf = NULL; int steps = 0; int start_timeout = fwd_msg->timeout; /* repeat until we are sure the message was sent */ while ((name = hostlist_shift(hl))) { if (slurm_conf_get_addr(name, &addr) == SLURM_ERROR) { error("forward_thread: can't find address for host " "%s, check slurm.conf", name); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, SLURM_UNKNOWN_FORWARD_ADDR); free(name); if (hostlist_count(hl) > 0) { slurm_mutex_unlock(&fwd_struct->forward_mutex); continue; } goto cleanup; } if ((fd = slurm_open_msg_conn(&addr)) < 0) { error("forward_thread to %s: %m", name); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward( &fwd_struct->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); free(name); if (hostlist_count(hl) > 0) { slurm_mutex_unlock(&fwd_struct->forward_mutex); /* Abandon tree. This way if all the * nodes in the branch are down we * don't have to time out for each * node serially. */ _forward_msg_internal(hl, NULL, fwd_struct, &fwd_msg->header, 0, hostlist_count(hl)); continue; } goto cleanup; } buf = hostlist_ranged_string_xmalloc(hl); xfree(fwd_msg->header.forward.nodelist); fwd_msg->header.forward.nodelist = buf; fwd_msg->header.forward.cnt = hostlist_count(hl); #if 0 info("sending %d forwards (%s) to %s", fwd_msg->header.forward.cnt, fwd_msg->header.forward.nodelist, name); #endif if (fwd_msg->header.forward.nodelist[0]) { debug3("forward: send to %s along with %s", name, fwd_msg->header.forward.nodelist); } else debug3("forward: send to %s ", name); pack_header(&fwd_msg->header, buffer); /* add forward data to buffer */ if (remaining_buf(buffer) < fwd_struct->buf_len) { int new_size = buffer->processed + fwd_struct->buf_len; new_size += 1024; /* padded for paranoia */ xrealloc_nz(buffer->head, new_size); buffer->size = new_size; } if (fwd_struct->buf_len) { memcpy(&buffer->head[buffer->processed], fwd_struct->buf, fwd_struct->buf_len); buffer->processed += fwd_struct->buf_len; } /* * forward message */ if (slurm_msg_sendto(fd, get_buf_data(buffer), get_buf_offset(buffer), SLURM_PROTOCOL_NO_SEND_RECV_FLAGS ) < 0) { error("forward_thread: slurm_msg_sendto: %m"); slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, errno); free(name); if (hostlist_count(hl) > 0) { free_buf(buffer); buffer = init_buf(fwd_struct->buf_len); slurm_mutex_unlock(&fwd_struct->forward_mutex); slurm_close(fd); fd = -1; /* Abandon tree. This way if all the * nodes in the branch are down we * don't have to time out for each * node serially. */ _forward_msg_internal(hl, NULL, fwd_struct, &fwd_msg->header, 0, hostlist_count(hl)); continue; } goto cleanup; } /* These messages don't have a return message, but if * we got here things worked out so make note of the * list of nodes as success. */ if ((fwd_msg->header.msg_type == REQUEST_SHUTDOWN) || (fwd_msg->header.msg_type == REQUEST_RECONFIGURE) || (fwd_msg->header.msg_type == REQUEST_REBOOT_NODES)) { slurm_mutex_lock(&fwd_struct->forward_mutex); ret_data_info = xmalloc(sizeof(ret_data_info_t)); list_push(fwd_struct->ret_list, ret_data_info); ret_data_info->node_name = xstrdup(name); free(name); while ((name = hostlist_shift(hl))) { ret_data_info = xmalloc(sizeof(ret_data_info_t)); list_push(fwd_struct->ret_list, ret_data_info); ret_data_info->node_name = xstrdup(name); free(name); } goto cleanup; } if (fwd_msg->header.forward.cnt > 0) { static int message_timeout = -1; if (message_timeout < 0) message_timeout = slurm_get_msg_timeout() * 1000; if (!fwd_msg->header.forward.tree_width) fwd_msg->header.forward.tree_width = slurm_get_tree_width(); steps = (fwd_msg->header.forward.cnt+1) / fwd_msg->header.forward.tree_width; fwd_msg->timeout = (message_timeout*steps); /* info("got %d * %d = %d", message_timeout, */ /* steps, fwd_msg->timeout); */ steps++; fwd_msg->timeout += (start_timeout*steps); /* info("now + %d*%d = %d", start_timeout, */ /* steps, fwd_msg->timeout); */ } ret_list = slurm_receive_msgs(fd, steps, fwd_msg->timeout); /* info("sent %d forwards got %d back", */ /* fwd_msg->header.forward.cnt, list_count(ret_list)); */ if (!ret_list || (fwd_msg->header.forward.cnt != 0 && list_count(ret_list) <= 1)) { slurm_mutex_lock(&fwd_struct->forward_mutex); mark_as_failed_forward(&fwd_struct->ret_list, name, errno); free(name); FREE_NULL_LIST(ret_list); if (hostlist_count(hl) > 0) { free_buf(buffer); buffer = init_buf(fwd_struct->buf_len); slurm_mutex_unlock(&fwd_struct->forward_mutex); slurm_close(fd); fd = -1; continue; } goto cleanup; } else if ((fwd_msg->header.forward.cnt+1) != list_count(ret_list)) { /* this should never be called since the above should catch the failed forwards and pipe them back down, but this is here so we never have to worry about a locked mutex */ ListIterator itr = NULL; char *tmp = NULL; int first_node_found = 0; hostlist_iterator_t host_itr = hostlist_iterator_create(hl); error("We shouldn't be here. We forwarded to %d " "but only got %d back", (fwd_msg->header.forward.cnt+1), list_count(ret_list)); while ((tmp = hostlist_next(host_itr))) { int node_found = 0; itr = list_iterator_create(ret_list); while ((ret_data_info = list_next(itr))) { if (!ret_data_info->node_name) { first_node_found = 1; ret_data_info->node_name = xstrdup(name); } if (!xstrcmp(tmp, ret_data_info->node_name)) { node_found = 1; break; } } list_iterator_destroy(itr); if (!node_found) { mark_as_failed_forward( &fwd_struct->ret_list, tmp, SLURM_COMMUNICATIONS_CONNECTION_ERROR); } free(tmp); } hostlist_iterator_destroy(host_itr); if (!first_node_found) { mark_as_failed_forward( &fwd_struct->ret_list, name, SLURM_COMMUNICATIONS_CONNECTION_ERROR); } } break; } slurm_mutex_lock(&fwd_struct->forward_mutex); if (ret_list) { while ((ret_data_info = list_pop(ret_list)) != NULL) { if (!ret_data_info->node_name) { ret_data_info->node_name = xstrdup(name); } list_push(fwd_struct->ret_list, ret_data_info); debug3("got response from %s", ret_data_info->node_name); } FREE_NULL_LIST(ret_list); } free(name); cleanup: if ((fd >= 0) && slurm_close(fd) < 0) error ("close(%d): %m", fd); hostlist_destroy(hl); destroy_forward(&fwd_msg->header.forward); free_buf(buffer); slurm_cond_signal(&fwd_struct->notify); slurm_mutex_unlock(&fwd_struct->forward_mutex); xfree(fwd_msg); return (NULL); }