eio_handle_t *eio_handle_create(uint16_t shutdown_wait) { eio_handle_t *eio = xmalloc(sizeof(*eio)); if (pipe(eio->fds) < 0) { error ("eio_create: pipe: %m"); eio_handle_destroy(eio); return (NULL); } fd_set_nonblocking(eio->fds[0]); fd_set_close_on_exec(eio->fds[0]); fd_set_close_on_exec(eio->fds[1]); xassert(eio->magic = EIO_MAGIC); eio->obj_list = list_create(eio_obj_destroy); eio->new_objs = list_create(eio_obj_destroy); slurm_mutex_init(&eio->shutdown_mutex); eio->shutdown_wait = DEFAULT_EIO_SHUTDOWN_WAIT; if (shutdown_wait > 0) eio->shutdown_wait = shutdown_wait; return eio; }
int open_tun(const char *tun_device) { int i; int tun_fd; struct ifreq ifreq; #ifdef ANDROID char *tunnel = "/dev/tun"; #else char *tunnel = "/dev/net/tun"; #endif if ((tun_fd = open(tunnel, O_RDWR)) < 0) { warn("open_tun: %s: %s", tunnel, strerror(errno)); return -1; } memset(&ifreq, 0, sizeof(ifreq)); ifreq.ifr_flags = IFF_TUN; if (tun_device != NULL) { strncpy(ifreq.ifr_name, tun_device, IFNAMSIZ); ifreq.ifr_name[IFNAMSIZ-1] = '\0'; strncpy(if_name, tun_device, sizeof(if_name)); if_name[sizeof(if_name)-1] = '\0'; if (ioctl(tun_fd, TUNSETIFF, (void *) &ifreq) != -1) { fprintf(stderr, "Opened %s\n", ifreq.ifr_name); fd_set_close_on_exec(tun_fd); return tun_fd; } if (errno != EBUSY) { warn("open_tun: ioctl[TUNSETIFF]: %s", strerror(errno)); return -1; } } else { for (i = 0; i < TUN_MAX_TRY; i++) { snprintf(ifreq.ifr_name, IFNAMSIZ, "dns%d", i); if (ioctl(tun_fd, TUNSETIFF, (void *) &ifreq) != -1) { fprintf(stderr, "Opened %s\n", ifreq.ifr_name); snprintf(if_name, sizeof(if_name), "dns%d", i); fd_set_close_on_exec(tun_fd); return tun_fd; } if (errno != EBUSY) { warn("open_tun: ioctl[TUNSETIFF]: %s", strerror(errno)); return -1; } } warn("open_tun: Couldn't set interface name"); } warn("error when opening tun"); return -1; }
int eio_message_socket_accept(eio_obj_t *obj, List objs) { int fd; unsigned char *uc; unsigned short port; struct sockaddr_in addr; slurm_msg_t *msg = NULL; int len = sizeof(addr); debug3("Called eio_msg_socket_accept"); xassert(obj); xassert(obj->ops->handle_msg); while ((fd = accept(obj->fd, (struct sockaddr *)&addr, (socklen_t *)&len)) < 0) { if (errno == EINTR) continue; if (errno == EAGAIN || errno == ECONNABORTED || errno == EWOULDBLOCK) { return SLURM_SUCCESS; } error("Error on msg accept socket: %m"); obj->shutdown = true; return SLURM_SUCCESS; } fd_set_close_on_exec(fd); fd_set_blocking(fd); /* Should not call slurm_get_addr() because the IP may not be in /etc/hosts. */ uc = (unsigned char *)&addr.sin_addr.s_addr; port = addr.sin_port; debug2("got message connection from %u.%u.%u.%u:%hu %d", uc[0], uc[1], uc[2], uc[3], ntohs(port), fd); fflush(stdout); msg = xmalloc(sizeof(slurm_msg_t)); slurm_msg_t_init(msg); again: if(slurm_receive_msg(fd, msg, obj->ops->timeout) != 0) { if (errno == EINTR) { goto again; } error("slurm_receive_msg[%u.%u.%u.%u]: %m", uc[0],uc[1],uc[2],uc[3]); goto cleanup; } (*obj->ops->handle_msg)(obj->arg, msg); /* handle_msg should free * msg->data */ cleanup: if ((msg->conn_fd >= 0) && slurm_close_accepted_conn(msg->conn_fd) < 0) error ("close(%d): %m", msg->conn_fd); slurm_free_msg(msg); return SLURM_SUCCESS; }
/* * TODO: we need to keep track of the "me" * structures created here, because we need to * free them in "pmixp_stepd_finalize" */ void pmix_server_new_conn(int fd) { eio_obj_t *obj; PMIXP_DEBUG("Request from fd = %d", fd); /* Set nonblocking */ fd_set_nonblocking(fd); fd_set_close_on_exec(fd); pmixp_io_engine_t *me = xmalloc(sizeof(pmixp_io_engine_t)); pmix_io_init(me, fd, srv_rcvd_header); /* We use slurm_forward_data to send message to stepd's * SLURM will put user ID there. We need to skip it. */ pmix_io_rcvd_padding(me, sizeof(uint32_t)); if( 2 == _process_message(me) ){ /* connection was fully processed here */ xfree(me); return; } /* If it is a blocking operation: create AIO object to * handle it */ obj = eio_obj_create(fd, &peer_ops, (void *)me); eio_new_obj(pmixp_info_io(), obj); }
/*! Removes the file descriptor from the specified slot. */ static struct file_descriptor* remove_fd(struct io_context* context, int fd) { struct file_descriptor* descriptor = NULL; if (fd < 0) return NULL; mutex_lock(&context->io_mutex); if ((uint32)fd < context->table_size) descriptor = context->fds[fd]; select_info* selectInfos = NULL; bool disconnected = false; if (descriptor) { // fd is valid context->fds[fd] = NULL; fd_set_close_on_exec(context, fd, false); context->num_used_fds--; selectInfos = context->select_infos[fd]; context->select_infos[fd] = NULL; disconnected = (descriptor->open_mode & O_DISCONNECTED); } mutex_unlock(&context->io_mutex); if (selectInfos != NULL) deselect_select_infos(descriptor, selectInfos); return disconnected ? NULL : descriptor; }
void pmixp_server_direct_conn(int fd) { eio_obj_t *obj; pmixp_conn_t *conn; PMIXP_DEBUG("Request from fd = %d", fd); /* Set nonblocking */ fd_set_nonblocking(fd); fd_set_close_on_exec(fd); pmixp_fd_set_nodelay(fd); conn = pmixp_conn_new_temp(PMIXP_PROTO_DIRECT, fd, _direct_conn_establish); /* try to process right here */ pmixp_conn_progress_rcv(conn); if (!pmixp_conn_is_alive(conn)) { /* success, don't need this connection anymore */ pmixp_conn_return(conn); return; } /* If it is a blocking operation: create AIO object to * handle it */ obj = eio_obj_create(fd, &direct_peer_ops, (void *)conn); eio_new_obj(pmixp_info_io(), obj); /* wakeup this connection to get processed */ eio_signal_wakeup(pmixp_info_io()); }
static struct file_descriptor * remove_fd(struct io_context *context, int fd) { struct file_descriptor *descriptor = NULL; if (fd < 0) return NULL; fssh_mutex_lock(&context->io_mutex); if ((uint32_t)fd < context->table_size) descriptor = context->fds[fd]; if (descriptor) { // fd is valid context->fds[fd] = NULL; fd_set_close_on_exec(context, fd, false); context->num_used_fds--; if (descriptor->open_mode & FSSH_O_DISCONNECTED) descriptor = NULL; } fssh_mutex_unlock(&context->io_mutex); return descriptor; }
/* * TODO: we need to keep track of the "me" * structures created here, because we need to * free them in "pmixp_stepd_finalize" */ void pmixp_server_slurm_conn(int fd) { eio_obj_t *obj; pmixp_conn_t *conn = NULL; PMIXP_DEBUG("Request from fd = %d", fd); pmixp_debug_hang(0); /* Set nonblocking */ fd_set_nonblocking(fd); fd_set_close_on_exec(fd); conn = pmixp_conn_new_temp(PMIXP_PROTO_SLURM, fd, _slurm_new_msg); /* try to process right here */ pmixp_conn_progress_rcv(conn); if (!pmixp_conn_is_alive(conn)) { /* success, don't need this connection anymore */ pmixp_conn_return(conn); return; } /* If it is a blocking operation: create AIO object to * handle it */ obj = eio_obj_create(fd, &slurm_peer_ops, (void *)conn); eio_new_obj(pmixp_info_io(), obj); }
/* * Create a named unix domain listening socket. * (cf, Stevens APUE 1st ed., section 15.5.2) */ static int _create_socket(const char *name) { int fd; int len; struct sockaddr_un addr; /* create a unix domain stream socket */ if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) return -1; fd_set_close_on_exec(fd); memset(&addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; strcpy(addr.sun_path, name); len = strlen(addr.sun_path)+1 + sizeof(addr.sun_family); /* bind the name to the descriptor */ if (bind(fd, (struct sockaddr *) &addr, len) < 0) return -2; if (listen(fd, 5) < 0) return -3; return fd; }
static int dup_fd(int fd, bool kernel) { struct io_context* context = get_current_io_context(kernel); struct file_descriptor* descriptor; int status; TRACE(("dup_fd: fd = %d\n", fd)); // Try to get the fd structure descriptor = get_fd(context, fd); if (descriptor == NULL) return B_FILE_ERROR; // now put the fd in place status = new_fd(context, descriptor); if (status < 0) put_fd(descriptor); else { mutex_lock(&context->io_mutex); fd_set_close_on_exec(context, status, false); mutex_unlock(&context->io_mutex); } return status; }
static void _create_msg_socket(void) { char* node_addr; slurm_fd_t ld = slurm_init_msg_engine_addrname_port(conf->node_addr, conf->port); if (conf->node_addr == NULL) node_addr = "*"; else node_addr = conf->node_addr; if (ld < 0) { error("Unable to bind listen port (%s:%d): %m", node_addr, conf->port); exit(1); } fd_set_close_on_exec(ld); conf->lfd = ld; debug3("successfully opened slurm listen port %s:%d", node_addr, conf->port); return; }
/*! POSIX says this should be the same as: close(newfd); fcntl(oldfd, F_DUPFD, newfd); We do dup2() directly to be thread-safe. */ static int dup2_fd(int oldfd, int newfd, bool kernel) { struct file_descriptor* evicted = NULL; struct io_context* context; TRACE(("dup2_fd: ofd = %d, nfd = %d\n", oldfd, newfd)); // quick check if (oldfd < 0 || newfd < 0) return B_FILE_ERROR; // Get current I/O context and lock it context = get_current_io_context(kernel); mutex_lock(&context->io_mutex); // Check if the fds are valid (mutex must be locked because // the table size could be changed) if ((uint32)oldfd >= context->table_size || (uint32)newfd >= context->table_size || context->fds[oldfd] == NULL || (context->fds[oldfd]->open_mode & O_DISCONNECTED) != 0) { mutex_unlock(&context->io_mutex); return B_FILE_ERROR; } // Check for identity, note that it cannot be made above // because we always want to return an error on invalid // handles select_info* selectInfos = NULL; if (oldfd != newfd) { // Now do the work TFD(Dup2FD(context, oldfd, newfd)); evicted = context->fds[newfd]; selectInfos = context->select_infos[newfd]; context->select_infos[newfd] = NULL; atomic_add(&context->fds[oldfd]->ref_count, 1); atomic_add(&context->fds[oldfd]->open_count, 1); context->fds[newfd] = context->fds[oldfd]; if (evicted == NULL) context->num_used_fds++; } fd_set_close_on_exec(context, newfd, false); mutex_unlock(&context->io_mutex); // Say bye bye to the evicted fd if (evicted) { deselect_select_infos(evicted, selectInfos, true); close_fd(evicted); put_fd(evicted); } return newfd; }
eio_handle_t *eio_handle_create(void) { eio_handle_t *eio = xmalloc(sizeof(*eio)); if (pipe(eio->fds) < 0) { error ("eio_create: pipe: %m"); eio_handle_destroy(eio); return (NULL); } fd_set_nonblocking(eio->fds[0]); fd_set_close_on_exec(eio->fds[0]); fd_set_close_on_exec(eio->fds[1]); xassert(eio->magic = EIO_MAGIC); eio->obj_list = list_create(eio_obj_destroy); eio->new_objs = list_create(eio_obj_destroy); return eio; }
int open_tun(const char *tun_device) { int i; int tun_fd; char tun_name[50]; if (tun_device != NULL) { snprintf(tun_name, sizeof(tun_name), "/dev/%s", tun_device); strncpy(if_name, tun_device, sizeof(if_name)); if_name[sizeof(if_name)-1] = '\0'; if ((tun_fd = open(tun_name, O_RDWR)) < 0) { warn("open_tun: %s: %s", tun_name, strerror(errno)); return -1; } fprintf(stderr, "Opened %s\n", tun_name); fd_set_close_on_exec(tun_fd); return tun_fd; } else { for (i = 0; i < TUN_MAX_TRY; i++) { snprintf(tun_name, sizeof(tun_name), "/dev/tun%d", i); if ((tun_fd = open(tun_name, O_RDWR)) >= 0) { fprintf(stderr, "Opened %s\n", tun_name); snprintf(if_name, sizeof(if_name), "tun%d", i); fd_set_close_on_exec(tun_fd); return tun_fd; } if (errno == ENOENT) break; } warn("open_tun: Failed to open tunneling device"); } return -1; }
extern int slurm_persist_conn_open_without_init( slurm_persist_conn_t *persist_conn) { slurm_addr_t addr; xassert(persist_conn); xassert(persist_conn->rem_host); xassert(persist_conn->rem_port); xassert(persist_conn->cluster_name); if (persist_conn->fd > 0) _close_fd(&persist_conn->fd); else persist_conn->fd = -1; if (!persist_conn->inited) persist_conn->inited = true; if (!persist_conn->version) { /* Set to MIN_PROTOCOL so that a higher version controller can * talk to a lower protocol version controller. When talking to * the DBD, the protocol version should be set to the current * protocol version prior to calling this. */ persist_conn->version = SLURM_MIN_PROTOCOL_VERSION; } if (persist_conn->timeout < 0) persist_conn->timeout = slurm_get_msg_timeout() * 1000; slurm_set_addr_char(&addr, persist_conn->rem_port, persist_conn->rem_host); if ((persist_conn->fd = slurm_open_msg_conn(&addr)) < 0) { if (_comm_fail_log(persist_conn)) { char *s = xstrdup_printf("%s: failed to open persistent connection to %s:%d: %m", __func__, persist_conn->rem_host, persist_conn->rem_port); if (persist_conn->flags & PERSIST_FLAG_SUPPRESS_ERR) debug2("%s", s); else error("%s", s); xfree(s); } return SLURM_ERROR; } fd_set_nonblocking(persist_conn->fd); fd_set_close_on_exec(persist_conn->fd); return SLURM_SUCCESS; }
static void _handle_connection(slurm_fd_t fd, slurm_addr_t *cli) { int rc; pthread_attr_t attr; pthread_t id; conn_t *arg = xmalloc(sizeof(conn_t)); int retries = 0; arg->fd = fd; arg->cli_addr = cli; slurm_attr_init(&attr); rc = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); if (rc != 0) { errno = rc; xfree(arg); error("Unable to set detachstate on attr: %m"); slurm_attr_destroy(&attr); return; } fd_set_close_on_exec(fd); _increment_thd_count(); while (pthread_create(&id, &attr, &_service_connection, (void *)arg)) { error("msg_engine: pthread_create: %m"); if (++retries > 3) { error("running service_connection without starting " "a new thread slurmd will be " "unresponsive until done"); _service_connection((void *) arg); info("slurmd should be responsive now"); break; } usleep(10); /* sleep and again */ } return; }
int open_dns_opt(struct sockaddr_storage *sockaddr, size_t sockaddr_len, int v6only) { int flag; int fd; if ((fd = socket(sockaddr->ss_family, SOCK_DGRAM, IPPROTO_UDP)) < 0) { err(1, "socket"); } flag = 1; #ifdef SO_REUSEPORT setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, (const void*) &flag, sizeof(flag)); #endif setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (const void*) &flag, sizeof(flag)); #ifndef WINDOWS32 fd_set_close_on_exec(fd); #endif if (sockaddr->ss_family == AF_INET6 && v6only >= 0) { setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (const void*) &v6only, sizeof(v6only)); } #ifdef IP_OPT_DONT_FRAG /* Set dont-fragment ip header flag */ flag = DONT_FRAG_VALUE; setsockopt(fd, IPPROTO_IP, IP_OPT_DONT_FRAG, (const void*) &flag, sizeof(flag)); #endif if(bind(fd, (struct sockaddr*) sockaddr, sockaddr_len) < 0) err(1, "bind"); fprintf(stderr, "Opened IPv%d UDP socket\n", sockaddr->ss_family == AF_INET6 ? 6 : 4); return fd; }
static int _open_msr(int core) { char msr_filename[BUFSIZ]; int fd; sprintf(msr_filename, "/dev/cpu/%d/msr", core); fd = open(msr_filename, O_RDONLY); if (fd < 0) { if ( errno == ENXIO ) { error("No CPU %d", core); } else if ( errno == EIO ) { error("CPU %d doesn't support MSRs", core); } else error("MSR register problem: %m"); } else { /* If this is loaded in the slurmd we need to make sure it gets closed when a slurmstepd launches. */ fd_set_close_on_exec(fd); } return fd; }
static void _dump_sicp_state(void) { char *old_file, *new_file, *reg_file; ListIterator sicp_iterator; sicp_job_t *sicp_ptr; Buf buffer; time_t now = time(NULL); int error_code = SLURM_SUCCESS, len, log_fd; pthread_mutex_lock(&sicp_lock); len = list_count(sicp_job_list) * 4 + 128; buffer = init_buf(len); packstr("PROTOCOL_VERSION", buffer); pack16(SLURM_PROTOCOL_VERSION, buffer); pack_time(now, buffer); sicp_iterator = list_iterator_create(sicp_job_list); while ((sicp_ptr = (sicp_job_t *) list_next(sicp_iterator))) { pack32(sicp_ptr->job_id, buffer); pack16(sicp_ptr->job_state, buffer); } list_iterator_destroy(sicp_iterator); pthread_mutex_unlock(&sicp_lock); old_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(old_file, "/sicp_state.old"); reg_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(reg_file, "/sicp_state"); new_file = xstrdup(slurmctld_conf.state_save_location); xstrcat(new_file, "/sicp_state.new"); lock_state_files(); log_fd = creat(new_file, 0600); if (log_fd < 0) { error("Can't save state, create file %s error %m", new_file); error_code = errno; } else { int pos = 0, nwrite, amount, rc; char *data; fd_set_close_on_exec(log_fd); nwrite = get_buf_offset(buffer); data = (char *)get_buf_data(buffer); while (nwrite > 0) { amount = write(log_fd, &data[pos], nwrite); if ((amount < 0) && (errno != EINTR)) { error("Error writing file %s, %m", new_file); error_code = errno; break; } nwrite -= amount; pos += amount; } rc = fsync_and_close(log_fd, "sicp"); if (rc && !error_code) error_code = rc; } if (error_code) { (void) unlink(new_file); } else { /* file shuffle */ (void) unlink(old_file); if (link(reg_file, old_file)) debug4("unable to create link for %s -> %s: %m", reg_file, old_file); (void) unlink(reg_file); if (link(new_file, reg_file)) debug4("unable to create link for %s -> %s: %m", new_file, reg_file); (void) unlink(new_file); } xfree(old_file); xfree(reg_file); xfree(new_file); unlock_state_files(); free_buf(buffer); }
/** * Closes all file descriptors greater or equal to ``first_fd'', skipping * preserved ones if ``preserve'' is TRUE. */ static void fd_close_from_internal(const int first_fd, bool preserve) { int fd; g_return_if_fail(first_fd >= 0); if (!preserve && try_close_from(first_fd)) return; fd = getdtablesize() - 1; while (fd >= first_fd) { if (preserve && hset_contains(fd_preserved, int_to_pointer(fd))) goto next; #ifdef HAVE_GTKOSXAPPLICATION /* OS X doesn't allow fds being closed not opened by us. During * GUI initialisation a new kqueue fd is created for UI events. This * is visible to us as a fifo which we are not allowed to close. * Set close on exec on all fifo's so we won't leak any of our other * fifo's * -- JA 2011-11-28 */ if (is_a_fifo(fd)) fd_set_close_on_exec(fd); else #endif /* OS X frowns upon random fds being closed --RAM 2011-11-13 */ if (fd_is_opened(fd)) { if (close(fd)) { #if defined(F_MAXFD) fd = fcntl(0, F_MAXFD); continue; #endif /* F_MAXFD */ } } next: fd--; } /* * When called with a first_fd of 3, and we are on Windows, also make * sure we close all the known sockets we have. This lets the process * safely auto-restart, avoiding multiple listening sockets on the same * port. * --RAM, 2015-04-05 */ if ( is_running_on_mingw() && !preserve && 3 == first_fd && NULL != fd_sockets ) { hset_t *fds = fd_sockets; /* * We're about to exec() another process, and we may be crashing, * hence do not bother using hset_foreach_remove() to ensure minimal * processing. We also reset the fd_sockets pointer to NULL to * make sure s_close() will do nothing when fd_notify_socket_closed() * is called. */ fd_sockets = NULL; /* We don't expect race conditions here */ hset_foreach(fds, fd_socket_close, NULL); /* Don't bother freeing / clearing set, we're about to exec() */ } }
static int _msg_socket_accept(eio_obj_t *obj, List objs) { slurmd_job_t *job = (slurmd_job_t *)obj->arg; int fd; struct sockaddr_un addr; int len = sizeof(addr); struct request_params *param = NULL; pthread_attr_t attr; pthread_t id; int retries = 0; debug3("Called _msg_socket_accept"); while ((fd = accept(obj->fd, (struct sockaddr *)&addr, (socklen_t *)&len)) < 0) { if (errno == EINTR) continue; if (errno == EAGAIN || errno == ECONNABORTED || errno == EWOULDBLOCK) { return SLURM_SUCCESS; } error("Error on msg accept socket: %m"); obj->shutdown = true; return SLURM_SUCCESS; } pthread_mutex_lock(&message_lock); message_connections++; pthread_mutex_unlock(&message_lock); fd_set_close_on_exec(fd); fd_set_blocking(fd); slurm_attr_init(&attr); if (pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0) { error("Unable to set detachstate on attr: %m"); slurm_attr_destroy(&attr); close(fd); return SLURM_ERROR; } param = xmalloc(sizeof(struct request_params)); param->fd = fd; param->job = job; while (pthread_create(&id, &attr, &_handle_accept, (void *)param)) { error("stepd_api message engine pthread_create: %m"); if (++retries > MAX_RETRIES) { error("running handle_accept without " "starting a thread stepd will be " "unresponsive until done"); _handle_accept((void *)param); info("stepd should be responsive now"); break; } usleep(10); /* sleep and again */ } slurm_attr_destroy(&attr); param = NULL; debug3("Leaving _msg_socket_accept"); return SLURM_SUCCESS; }
int pmixp_stepd_init(const stepd_step_rec_t *job, char ***env) { char *path; int fd, rc; if (SLURM_SUCCESS != (rc = pmixp_info_set(job, env))) { PMIXP_ERROR("pmixp_info_set(job, env) failed"); goto err_info; } /* Create UNIX socket for slurmd communication */ path = pmixp_info_nspace_usock(pmixp_info_namespace()); if (NULL == path) { PMIXP_ERROR("pmixp_info_nspace_usock: out-of-memory"); rc = SLURM_ERROR; goto err_path; } if ((fd = pmixp_usock_create_srv(path)) < 0) { PMIXP_ERROR("pmixp_usock_create_srv"); rc = SLURM_ERROR; goto err_usock; } fd_set_close_on_exec(fd); pmixp_info_srv_usock_set(path, fd); if (!pmixp_info_same_arch()){ _direct_proto.hdr_unpack_cb = _direct_hdr_unpack_portable; _direct_hdr_pack = _direct_hdr_pack_portable; } pmixp_conn_init(_slurm_proto, _direct_proto); if((rc = pmixp_dconn_init(pmixp_info_nodes_uni(), _direct_proto)) ){ PMIXP_ERROR("pmixp_dconn_init() failed"); goto err_dconn; } if ((rc = pmixp_nspaces_init())) { PMIXP_ERROR("pmixp_nspaces_init() failed"); goto err_nspaces; } if (SLURM_SUCCESS != (rc = pmixp_state_init())) { PMIXP_ERROR("pmixp_state_init() failed"); goto err_state; } if (SLURM_SUCCESS != (rc = pmixp_dmdx_init())) { PMIXP_ERROR("pmixp_dmdx_init() failed"); goto err_dmdx; } if (SLURM_SUCCESS != (rc = pmixp_libpmix_init())) { PMIXP_ERROR("pmixp_libpmix_init() failed"); goto err_lib; } if (SLURM_SUCCESS != (rc = pmixp_libpmix_job_set())) { PMIXP_ERROR("pmixp_libpmix_job_set() failed"); goto err_job; } pmixp_server_init_pp(env); pmixp_server_init_cperf(env); xfree(path); _was_initialized = 1; return SLURM_SUCCESS; err_job: pmixp_libpmix_finalize(); err_lib: pmixp_dmdx_finalize(); err_dmdx: pmixp_state_finalize(); err_state: pmixp_nspaces_finalize(); err_nspaces: pmixp_dconn_fini(); err_dconn: pmixp_conn_fini(); close(pmixp_info_srv_usock_fd()); err_usock: xfree(path); err_path: pmixp_info_free(); err_info: return rc; }
/** * Open file, returning file descriptor or -1 on error with errno set. * Errors are logged as a warning, unless `missing' is TRUE, in which * case no error is logged for ENOENT. * No errors from open() due to invalid permission are logged when `silent' * is TRUE. */ static int do_open(const char *path, int flags, int mode, bool missing, bool absolute, bool silent) { const char *what; int fd; if (absolute && !is_absolute_path(path)) { s_warning("%s(): can't open absolute \"%s\": relative path", G_STRFUNC, path); errno = EPERM; return -1; } #ifdef O_NOCTTY flags |= O_NOCTTY; #endif /* O_NOCTTY */ fd = open(path, flags, mode); if (fd < 0) { if (flags & O_CREAT) what = "create"; else if (O_RDONLY == (flags & O_ACCMODE)) what = "read"; else if (O_WRONLY == (flags & O_ACCMODE)) what = "write into"; else what = "open"; /* * If we ran out of file descriptors, try to reclaim one from the * banning pool and retry. */ if ( (errno == EMFILE || errno == ENFILE) && reclaim_fd != NULL && (*reclaim_fd)() ) { fd = open(path, flags, mode); if (fd >= 0) { s_message("%s(): had to reclaim an unused to %s file", G_STRFUNC, what); } } } if (fd >= 0) { fd = fd_get_non_stdio(fd); fd_set_close_on_exec(fd); /* Just in case */ return fd; } /* * Hack for broken libc, which can return -1 with errno = 0! * This happens when compiling with gcc-3.x and linking with -lpthread * on a Debian linux system. * --RAM, 15/02/2004 */ if (errno == 0) { s_warning("%s(): open() returned -1 with errno = 0, assuming ENOENT", G_STRFUNC); errno = ENOENT; } if (!missing || errno != ENOENT) { if (!silent || errno != EACCES) { s_warning("%s(): can't %s file \"%s\": %m", G_STRFUNC, what, path); } } return -1; }
/* Saves the state of all jobcomp data for further indexing retries */ static int _save_state(void) { int fd, rc = SLURM_SUCCESS; char *state_file, *new_file, *old_file; ListIterator iter; static int high_buffer_size = (1024 * 1024); Buf buffer = init_buf(high_buffer_size); uint32_t job_cnt; struct job_node *jnode; job_cnt = list_count(jobslist); pack32(job_cnt, buffer); iter = list_iterator_create(jobslist); while ((jnode = (struct job_node *)list_next(iter))) { packstr(jnode->serialized_job, buffer); } list_iterator_destroy(iter); state_file = slurm_get_state_save_location(); if (state_file == NULL || state_file[0] == '\0') { error("%s: Could not retrieve StateSaveLocation from conf", plugin_type); return SLURM_ERROR; } if (state_file[strlen(state_file) - 1] != '/') xstrcat(state_file, "/"); xstrcat(state_file, save_state_file); old_file = xstrdup(state_file); new_file = xstrdup(state_file); xstrcat(new_file, ".new"); xstrcat(old_file, ".old"); slurm_mutex_lock(&save_lock); fd = open(new_file, O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR); if (fd < 0) { error("%s: Can't save jobcomp state, open file %s error %m", plugin_type, new_file); rc = SLURM_ERROR; } else { int pos = 0, nwrite, amount, rc2; char *data; fd_set_close_on_exec(fd); nwrite = get_buf_offset(buffer); data = (char *) get_buf_data(buffer); high_buffer_size = MAX(nwrite, high_buffer_size); while (nwrite > 0) { amount = write(fd, &data[pos], nwrite); if ((amount < 0) && (errno != EINTR)) { error("%s: Error writing file %s, %m", plugin_type, new_file); rc = SLURM_ERROR; break; } nwrite -= amount; pos += amount; } if ((rc2 = fsync_and_close(fd, save_state_file))) rc = rc2; } if (rc == SLURM_ERROR) (void) unlink(new_file); else { (void) unlink(old_file); if (link(state_file, old_file)) { error("%s: Unable to create link for %s -> %s: %m", plugin_type, state_file, old_file); rc = SLURM_ERROR; } (void) unlink(state_file); if (link(new_file, state_file)) { error("%s: Unable to create link for %s -> %s: %m", plugin_type, new_file, state_file); rc = SLURM_ERROR; } (void) unlink(new_file); } xfree(old_file); xfree(state_file); xfree(new_file); slurm_mutex_unlock(&save_lock); free_buf(buffer); return rc; }
int main (int argc, char *argv[]) { int i, pidfd; int blocked_signals[] = {SIGPIPE, 0}; char *oom_value; uint32_t slurmd_uid = 0; uint32_t curr_uid = 0; char time_stamp[256]; log_options_t lopts = LOG_OPTS_INITIALIZER; /* NOTE: logfile is NULL at this point */ log_init(argv[0], lopts, LOG_DAEMON, NULL); /* * Make sure we have no extra open files which * would be propagated to spawned tasks. */ for (i=3; i<256; i++) (void) close(i); /* * Drop supplementary groups. */ if (geteuid() == 0) { if (setgroups(0, NULL) != 0) { fatal("Failed to drop supplementary groups, " "setgroups: %m"); } } else { debug("Not running as root. Can't drop supplementary groups"); } /* * Create and set default values for the slurmd global * config variable "conf" */ conf = xmalloc(sizeof(slurmd_conf_t)); _init_conf(); conf->argv = &argv; conf->argc = &argc; if (_slurmd_init() < 0) { error( "slurmd initialization failed" ); fflush( NULL ); exit(1); } slurmd_uid = slurm_get_slurmd_user_id(); curr_uid = getuid(); if (curr_uid != slurmd_uid) { struct passwd *pw = NULL; char *slurmd_user = NULL; char *curr_user = NULL; /* since when you do a getpwuid you get a pointer to a * structure you have to do a xstrdup on the first * call or your information will just get over * written. This is a memory leak, but a fatal is * called right after so it isn't that big of a deal. */ if ((pw=getpwuid(slurmd_uid))) slurmd_user = xstrdup(pw->pw_name); if ((pw=getpwuid(curr_uid))) curr_user = pw->pw_name; fatal("You are running slurmd as something " "other than user %s(%d). If you want to " "run as this user add SlurmdUser=%s " "to the slurm.conf file.", slurmd_user, slurmd_uid, curr_user); } init_setproctitle(argc, argv); xsignal(SIGTERM, &_term_handler); xsignal(SIGINT, &_term_handler); xsignal(SIGHUP, &_hup_handler ); xsignal_block(blocked_signals); debug3("slurmd initialization successful"); /* * Become a daemon if desired. * Do not chdir("/") or close all fd's */ if (conf->daemonize) { if (daemon(1,1) == -1) error("Couldn't daemonize slurmd: %m"); } test_core_limit(); info("slurmd version %s started", SLURM_VERSION_STRING); debug3("finished daemonize"); if ((oom_value = getenv("SLURMD_OOM_ADJ"))) { i = atoi(oom_value); debug("Setting slurmd oom_adj to %d", i); set_oom_adj(i); } _kill_old_slurmd(); if (conf->mlock_pages) { /* * Call mlockall() if available to ensure slurmd * doesn't get swapped out */ #ifdef _POSIX_MEMLOCK if (mlockall (MCL_FUTURE | MCL_CURRENT) < 0) error ("failed to mlock() slurmd pages: %m"); #else error ("mlockall() system call does not appear to be available"); #endif /* _POSIX_MEMLOCK */ } /* * Restore any saved revoked credential information */ if (!conf->cleanstart && (_restore_cred_state(conf->vctx) < 0)) return SLURM_FAILURE; if (job_container_init() < 0) fatal("Unable to initialize job_container plugin."); if (container_g_restore(conf->spooldir, !conf->cleanstart)) error("Unable to restore job_container state."); if (switch_g_node_init() < 0) fatal("Unable to initialize interconnect."); if (conf->cleanstart && switch_g_clear_node_state()) fatal("Unable to clear interconnect state."); switch_g_slurmd_init(); _create_msg_socket(); conf->pid = getpid(); /* This has to happen after daemon(), which closes all fd's, so we keep the write lock of the pidfile. */ pidfd = create_pidfile(conf->pidfile, 0); if (pidfd >= 0) fd_set_close_on_exec(pidfd); rfc2822_timestamp(time_stamp, sizeof(time_stamp)); info("%s started on %s", slurm_prog_name, time_stamp); _install_fork_handlers(); list_install_fork_handlers(); slurm_conf_install_fork_handlers(); _spawn_registration_engine(); _msg_engine(); /* * Close fd here, otherwise we'll deadlock since create_pidfile() * flocks the pidfile. */ if (pidfd >= 0) /* valid pidfd, non-error */ (void) close(pidfd); /* Ignore errors */ if (unlink(conf->pidfile) < 0) error("Unable to remove pidfile `%s': %m", conf->pidfile); _wait_for_all_threads(); switch_g_node_fini(); _slurmd_fini(); _destroy_conf(); slurm_crypto_fini(); /* must be after _destroy_conf() */ info("Slurmd shutdown completing"); log_fini(); return 0; }
/* * Initialize log with * prog = program name to tag error messages with * opt = log_options_t specifying max log levels for syslog, stderr, and file * fac = log facility for syslog (unused if syslog level == LOG_QUIET) * logfile = * logfile name if logfile level > LOG_QUIET */ static int _log_init(char *prog, log_options_t opt, log_facility_t fac, char *logfile ) { int rc = 0; if (!log) { log = (log_t *)xmalloc(sizeof(log_t)); log->logfp = NULL; log->argv0 = NULL; log->buf = NULL; log->fbuf = NULL; log->fpfx = NULL; atfork_install_handlers(); } if (prog) { if (log->argv0) xfree(log->argv0); log->argv0 = xstrdup(xbasename(prog)); } else if (!log->argv0) { const char *short_name = strrchr(default_name, '/'); if (short_name) short_name++; else short_name = default_name; log->argv0 = xstrdup(short_name); } if (!log->fpfx) log->fpfx = xstrdup(""); log->opt = opt; if (log->buf) { cbuf_destroy(log->buf); log->buf = NULL; } if (log->fbuf) { cbuf_destroy(log->fbuf); log->fbuf = NULL; } if (log->opt.buffered) { log->buf = cbuf_create(128, 8192); log->fbuf = cbuf_create(128, 8192); } if (log->opt.syslog_level > LOG_LEVEL_QUIET) log->facility = fac; if (logfile && (log->opt.logfile_level > LOG_LEVEL_QUIET)) { FILE *fp; fp = safeopen(logfile, "a", SAFEOPEN_LINK_OK); if (!fp) { char *errmsg = NULL; xslurm_strerrorcat(errmsg); fprintf(stderr, "%s: log_init(): Unable to open logfile" "`%s': %s\n", prog, logfile, errmsg); xfree(errmsg); rc = errno; goto out; } if (log->logfp) fclose(log->logfp); /* Ignore errors */ log->logfp = fp; } if (log->logfp) { int fd; if ((fd = fileno(log->logfp)) < 0) log->logfp = NULL; else fd_set_close_on_exec(fd); } log->initialized = 1; out: return rc; }
int pmixp_stepd_init(const stepd_step_rec_t *job, char ***env) { char *path; int fd, rc; if (SLURM_SUCCESS != (rc = pmixp_info_set(job, env))) { PMIXP_ERROR("pmixp_info_set(job, env) failed"); return rc; } /* Create UNIX socket for slurmd communication */ path = pmixp_info_nspace_usock(pmixp_info_namespace()); if (NULL == path) { PMIXP_ERROR("Out-of-memory"); rc = SLURM_ERROR; goto err_path; } if ((fd = pmixp_usock_create_srv(path)) < 0) { rc = SLURM_ERROR; goto err_usock; } fd_set_close_on_exec(fd); pmixp_info_srv_contacts(path, fd); if (SLURM_SUCCESS != (rc = pmixp_nspaces_init())) { PMIXP_ERROR("pmixp_nspaces_init() failed"); goto err_usock; } if (SLURM_SUCCESS != (rc = pmixp_state_init())) { PMIXP_ERROR("pmixp_state_init() failed"); goto err_state; } if (SLURM_SUCCESS != (rc = pmixp_dmdx_init())) { PMIXP_ERROR("pmixp_dmdx_init() failed"); goto err_dmdx; } if (SLURM_SUCCESS != (rc = pmixp_libpmix_init())) { PMIXP_ERROR("pmixp_libpmix_init() failed"); goto err_lib; } if (SLURM_SUCCESS != (rc = pmixp_libpmix_job_set())) { PMIXP_ERROR("pmixp_libpmix_job_set() failed"); goto err_job; } xfree(path); _was_initialized = 1; return SLURM_SUCCESS; err_job: pmixp_libpmix_finalize(); err_lib: pmixp_dmdx_finalize(); err_dmdx: pmixp_state_finalize(); err_state: pmixp_nspaces_finalize(); err_usock: xfree(path); err_path: pmixp_info_free(); return rc; }
/* * Initialize scheduler log with * prog = program name to tag error messages with * opt = log_options_t specifying max log levels for syslog, stderr, and file * fac = log facility for syslog (unused if syslog level == LOG_QUIET) * logfile = logfile name if logfile level > LOG_QUIET */ static int _sched_log_init(char *prog, log_options_t opt, log_facility_t fac, char *logfile) { int rc = 0; if (!sched_log) { sched_log = (log_t *)xmalloc(sizeof(log_t)); atfork_install_handlers(); } if (prog) { xfree(sched_log->argv0); sched_log->argv0 = xstrdup(xbasename(prog)); } else if (!sched_log->argv0) { const char *short_name; short_name = strrchr((const char *) default_name, '/'); if (short_name) short_name++; else short_name = default_name; sched_log->argv0 = xstrdup(short_name); } if (!sched_log->fpfx) sched_log->fpfx = xstrdup(""); sched_log->opt = opt; if (sched_log->buf) { cbuf_destroy(sched_log->buf); sched_log->buf = NULL; } if (sched_log->fbuf) { cbuf_destroy(sched_log->fbuf); sched_log->fbuf = NULL; } if (sched_log->opt.buffered) { sched_log->buf = cbuf_create(128, 8192); sched_log->fbuf = cbuf_create(128, 8192); } if (sched_log->opt.syslog_level > LOG_LEVEL_QUIET) sched_log->facility = fac; if (logfile) { FILE *fp; fp = safeopen(logfile, "a", SAFEOPEN_LINK_OK); if (!fp) { rc = errno; goto out; } if (sched_log->logfp) fclose(sched_log->logfp); /* Ignore errors */ sched_log->logfp = fp; } if (sched_log->logfp) { int fd; if ((fd = fileno(sched_log->logfp)) < 0) sched_log->logfp = NULL; else fd_set_close_on_exec(fd); } sched_log->initialized = 1; out: return rc; }
static int _slurmd_init(void) { struct rlimit rlim; slurm_ctl_conf_t *cf; struct stat stat_buf; uint32_t cpu_cnt; /* * Process commandline arguments first, since one option may be * an alternate location for the slurm config file. */ _process_cmdline(*conf->argc, *conf->argv); /* * Build nodes table like in slurmctld * This is required by the topology stack * Node tables setup must preceed _read_config() so that the * proper hostname is set. */ slurm_conf_init(conf->conffile); init_node_conf(); /* slurm_select_init() must be called before * build_all_nodeline_info() to be called with proper argument. */ if (slurm_select_init(1) != SLURM_SUCCESS ) return SLURM_FAILURE; build_all_nodeline_info(true); build_all_frontend_info(true); /* * Read global slurm config file, override necessary values from * defaults and command line. */ _read_config(); cpu_cnt = MAX(conf->conf_cpus, conf->block_map_size); if ((gres_plugin_init() != SLURM_SUCCESS) || (gres_plugin_node_config_load(cpu_cnt) != SLURM_SUCCESS)) return SLURM_FAILURE; if (slurm_topo_init() != SLURM_SUCCESS) return SLURM_FAILURE; /* * Get and set slurmd topology information * Build node hash table first to speed up the topo build */ rehash_node(); slurm_topo_build_config(); _set_topo_info(); /* * Check for cpu frequency set capabilities on this node */ cpu_freq_init(conf); _print_conf(); if (slurm_proctrack_init() != SLURM_SUCCESS) return SLURM_FAILURE; if (slurmd_task_init() != SLURM_SUCCESS) return SLURM_FAILURE; if (slurm_auth_init(NULL) != SLURM_SUCCESS) return SLURM_FAILURE; if (spank_slurmd_init() < 0) return SLURM_FAILURE; if (getrlimit(RLIMIT_CPU, &rlim) == 0) { rlim.rlim_cur = rlim.rlim_max; setrlimit(RLIMIT_CPU, &rlim); if (rlim.rlim_max != RLIM_INFINITY) { error("Slurmd process CPU time limit is %d seconds", (int) rlim.rlim_max); } } if (getrlimit(RLIMIT_NOFILE, &rlim) == 0) { rlim.rlim_cur = rlim.rlim_max; setrlimit(RLIMIT_NOFILE, &rlim); } #ifndef NDEBUG if (getrlimit(RLIMIT_CORE, &rlim) == 0) { rlim.rlim_cur = rlim.rlim_max; setrlimit(RLIMIT_CORE, &rlim); } #endif /* !NDEBUG */ /* * Create a context for verifying slurm job credentials */ if (!(conf->vctx = slurm_cred_verifier_ctx_create(conf->pubkey))) return SLURM_FAILURE; if (!strcmp(conf->select_type, "select/serial")) { /* Only cache credential for 5 seconds with select/serial * for shorter cache searches and higher throughput */ slurm_cred_ctx_set(conf->vctx, SLURM_CRED_OPT_EXPIRY_WINDOW, 5); } /* * Create slurmd spool directory if necessary. */ if (_set_slurmd_spooldir() < 0) { error("Unable to initialize slurmd spooldir"); return SLURM_FAILURE; } if (conf->cleanstart) { /* * Need to kill any running slurmd's here */ _kill_old_slurmd(); stepd_cleanup_sockets(conf->spooldir, conf->node_name); _stepd_cleanup_batch_dirs(conf->spooldir, conf->node_name); } if (conf->daemonize) { bool success = false; if (conf->logfile && (conf->logfile[0] == '/')) { char *slash_ptr, *work_dir; work_dir = xstrdup(conf->logfile); slash_ptr = strrchr(work_dir, '/'); if (slash_ptr == work_dir) work_dir[1] = '\0'; else slash_ptr[0] = '\0'; if ((access(work_dir, W_OK) != 0) || (chdir(work_dir) < 0)) { error("Unable to chdir to %s", work_dir); } else success = true; xfree(work_dir); } if (!success) { if ((access(conf->spooldir, W_OK) != 0) || (chdir(conf->spooldir) < 0)) { error("Unable to chdir to %s", conf->spooldir); } else success = true; } if (!success) { if ((access("/var/tmp", W_OK) != 0) || (chdir("/var/tmp") < 0)) { error("chdir(/var/tmp): %m"); return SLURM_FAILURE; } else info("chdir to /var/tmp"); } } /* * Cache the group access list */ cf = slurm_conf_lock(); if (cf->group_info & GROUP_CACHE) init_gids_cache(1); else init_gids_cache(0); slurm_conf_unlock(); if ((devnull = open("/dev/null", O_RDWR)) < 0) { error("Unable to open /dev/null: %m"); return SLURM_FAILURE; } fd_set_close_on_exec(devnull); /* make sure we have slurmstepd installed */ if (stat(conf->stepd_loc, &stat_buf)) fatal("Unable to find slurmstepd file at %s", conf->stepd_loc); if (!S_ISREG(stat_buf.st_mode)) fatal("slurmstepd not a file at %s", conf->stepd_loc); return SLURM_SUCCESS; }