HYD_status HYDU_send_strlist(int fd, char **strlist) { int i, list_len, len; int sent, closed; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); /* Check how many arguments we have */ list_len = HYDU_strlist_lastidx(strlist); status = HYDU_sock_write(fd, &list_len, sizeof(int), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to proxy\n"); HYDU_ASSERT(!closed, status); /* Convert the string list to parseable data and send */ for (i = 0; strlist[i]; i++) { len = strlen(strlist[i]) + 1; status = HYDU_sock_write(fd, &len, sizeof(int), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to proxy\n"); HYDU_ASSERT(!closed, status); status = HYDU_sock_write(fd, strlist[i], len, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to proxy\n"); HYDU_ASSERT(!closed, status); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status cmd_response(int fd, int pid, const char *cmd) { struct HYD_pmcd_hdr hdr; int sent, closed; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYD_pmcd_init_header(&hdr); hdr.cmd = PMI_RESPONSE; hdr.pid = pid; hdr.pmi_version = 1; hdr.buflen = strlen(cmd); status = HYDU_sock_write(fd, &hdr, sizeof(hdr), &sent, &closed); HYDU_ERR_POP(status, "unable to send PMI_RESPONSE header to proxy\n"); HYDU_ASSERT(!closed, status); if (HYD_server_info.user_global.debug) { HYDU_dump(stdout, "PMI response to fd %d pid %d: %s", fd, pid, cmd); } status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed); HYDU_ERR_POP(status, "unable to send response to command\n"); HYDU_ASSERT(!closed, status); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status send_cmd_downstream(int fd, const char *cmd) { char cmdlen[7]; int sent, closed; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); MPL_snprintf(cmdlen, 7, "%6u", (unsigned) strlen(cmd)); status = HYDU_sock_write(fd, cmdlen, 6, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "error writing PMI line\n"); /* FIXME: We cannot abort when we are not able to send data * downstream. The upper layer needs to handle this based on * whether we want to abort or not.*/ HYDU_ASSERT(!closed, status); if (HYD_pmcd_pmip.user_global.debug) { HYDU_dump(stdout, "PMI response: %s\n", cmd); } status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_ASSERT(!closed, status); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static void signal_cb(int signum) { struct HYD_cmd cmd; static int sigint_count = 0; int sent, closed; HYDU_FUNC_ENTER(); /* SIGALRM is a special signal that indicates that a checkpoint * needs to be initiated */ if (signum == SIGALRM) { if (HYD_server_info.user_global.ckpoint_prefix == NULL) { HYDU_dump(stderr, "No checkpoint prefix provided\n"); return; } #if HAVE_ALARM if (HYD_ui_mpich_info.ckpoint_int != -1) alarm(HYD_ui_mpich_info.ckpoint_int); #endif /* HAVE_ALARM */ cmd.type = HYD_CKPOINT; HYDU_sock_write(HYD_server_info.cmd_pipe[1], &cmd, sizeof(cmd), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); goto fn_exit; } cmd.type = HYD_SIGNAL; cmd.signum = signum; /* SIGINT is a partially special signal. The first time we see it, * we will send it to the processes. The next time, we will treat * it as a SIGKILL (user convenience to force kill processes). */ if (signum == SIGINT && ++sigint_count > 1) cmd.type = HYD_CLEANUP; else if (signum == SIGINT) { /* First Ctrl-C */ HYDU_dump(stdout, "Sending Ctrl-C to processes as requested\n"); HYDU_dump(stdout, "Press Ctrl-C again to force abort\n"); } HYDU_sock_write(HYD_server_info.cmd_pipe[1], &cmd, sizeof(cmd), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); fn_exit: HYDU_FUNC_EXIT(); return; }
static HYD_status send_cmd_upstream(const char *start, int fd, char *args[]) { int i, sent, closed; struct HYD_string_stash stash; char *buf = NULL; struct HYD_pmcd_hdr hdr; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYD_STRING_STASH_INIT(stash); HYD_STRING_STASH(stash, MPL_strdup(start), status); for (i = 0; args[i]; i++) { HYD_STRING_STASH(stash, MPL_strdup(args[i]), status); if (args[i + 1]) HYD_STRING_STASH(stash, MPL_strdup(";"), status); } HYD_STRING_SPIT(stash, buf, status); HYD_pmcd_init_header(&hdr); hdr.cmd = PMI_CMD; hdr.pid = fd; hdr.buflen = strlen(buf); hdr.pmi_version = 2; status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send PMI header upstream\n"); HYDU_ASSERT(!closed, status); if (HYD_pmcd_pmip.user_global.debug) { HYDU_dump(stdout, "forwarding command (%s) upstream\n", buf); } status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, hdr.buflen, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send PMI command upstream\n"); HYDU_ASSERT(!closed, status); fn_exit: if (buf) MPL_free(buf); HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status cmd_response(int fd, int pid, char *cmd) { char cmdlen[7]; struct HYD_pmcd_hdr hdr; int sent, closed; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); HYD_pmcd_init_header(&hdr); hdr.cmd = PMI_RESPONSE; hdr.pid = pid; hdr.pmi_version = 2; hdr.buflen = 6 + strlen(cmd); status = HYDU_sock_write(fd, &hdr, sizeof(hdr), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send PMI_RESPONSE header to proxy\n"); HYDU_ASSERT(!closed, status); HYDU_snprintf(cmdlen, 7, "%6u", (unsigned) strlen(cmd)); status = HYDU_sock_write(fd, cmdlen, 6, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_ASSERT(!closed, status); if (HYD_server_info.user_global.debug) { HYDU_dump(stdout, "PMI response to fd %d pid %d: %s\n", fd, pid, cmd); } status = HYDU_sock_write(fd, cmd, strlen(cmd), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "error writing PMI line\n"); HYDU_ASSERT(!closed, status); fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
HYD_status HYD_pmcd_pmiserv_send_signal(struct HYD_proxy *proxy, int signum) { struct HYD_pmcd_hdr hdr; int sent, closed; HYD_status status = HYD_SUCCESS; HYD_pmcd_init_header(&hdr); hdr.cmd = SIGNAL; hdr.signum = signum; status = HYDU_sock_write(proxy->control_fd, &hdr, sizeof(hdr), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to proxy\n"); HYDU_ASSERT(!closed, status); fn_exit: return status; fn_fail: goto fn_exit; }
/* This function does not provide any flow control. We just read from * the incoming socket as much as we can and push out to the outgoing * socket as much as we can. This can result in the process calling it * polling continuously waiting for events, but that's a rare case for * stdio (which is what this function is meant to provide * functionality for). */ HYD_status HYDU_sock_forward_stdio(int in, int out, int *closed) { struct fwd_hash *fwd_hash, *tmp; int count; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); /* find the fwd hash */ for (tmp = fwd_hash_list; tmp; tmp = tmp->next) if (out == tmp->out) break; if (tmp == NULL) { /* No hash found; create one */ status = alloc_fwd_hash(&fwd_hash, in, out); HYDU_ERR_POP(status, "unable to allocate forward hash\n"); if (fwd_hash_list == NULL) fwd_hash_list = fwd_hash; else { for (tmp = fwd_hash_list; tmp->next; tmp = tmp->next); tmp->next = fwd_hash; } } else { fwd_hash = tmp; } *closed = 0; if (fwd_hash->buf_count == 0) { /* there is no data in the buffer, read something into it */ status = HYDU_sock_read(in, fwd_hash->buf, HYD_TMPBUF_SIZE, &count, closed, HYDU_SOCK_COMM_NONE); HYDU_ERR_POP(status, "read error\n"); if (!*closed) { fwd_hash->buf_offset = 0; fwd_hash->buf_count += count; /* We should never get a zero count, as the upper-layer * should have waited for an event from the demux engine * before calling us. */ HYDU_ASSERT(count, status); } } if (fwd_hash->buf_count) { /* there is data in the buffer, send it out first */ status = HYDU_sock_write(out, fwd_hash->buf + fwd_hash->buf_offset, fwd_hash->buf_count, &count, closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "write error\n"); if (!*closed) { fwd_hash->buf_offset += count; fwd_hash->buf_count -= count; } } /* If the incoming socket is closed, make sure we forward out all * of the buffered data */ while (*closed && fwd_hash->buf_count) { status = HYDU_sock_write(out, fwd_hash->buf + fwd_hash->buf_offset, fwd_hash->buf_count, &count, closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "write error\n"); if (!*closed) { fwd_hash->buf_offset += count; fwd_hash->buf_count -= count; } } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status stdoe_cb(int fd, HYD_event_t events, void *userp) { int closed, i, sent, recvd, stdfd; char buf[HYD_TMPBUF_SIZE]; struct HYD_pmcd_hdr hdr; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); stdfd = (int) (size_t) userp; status = HYDU_sock_read(fd, buf, HYD_TMPBUF_SIZE, &recvd, &closed, HYDU_SOCK_COMM_NONE); HYDU_ERR_POP(status, "sock read error\n"); if (recvd) { if (stdfd == STDOUT_FILENO) { HYD_pmcd_init_header(&hdr); hdr.cmd = STDOUT; for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) if (HYD_pmcd_pmip.downstream.out[i] == fd) break; } else { HYD_pmcd_init_header(&hdr); hdr.cmd = STDERR; for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) if (HYD_pmcd_pmip.downstream.err[i] == fd) break; } HYDU_ASSERT(i < HYD_pmcd_pmip.local.proxy_process_count, status); hdr.pgid = HYD_pmcd_pmip.local.pgid; hdr.proxy_id = HYD_pmcd_pmip.local.id; hdr.rank = HYD_pmcd_pmip.downstream.pmi_rank[i]; hdr.buflen = recvd; { int upstream_sock_closed; status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &upstream_sock_closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "sock write error\n"); HYDU_ASSERT(!upstream_sock_closed, status); status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, buf, recvd, &sent, &upstream_sock_closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "sock write error\n"); HYDU_ASSERT(!upstream_sock_closed, status); } } if (closed) { /* The connection has closed */ status = HYDT_dmx_deregister_fd(fd); HYDU_ERR_POP(status, "unable to deregister fd\n"); if (stdfd == STDOUT_FILENO) { for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) if (HYD_pmcd_pmip.downstream.out[i] == fd) HYD_pmcd_pmip.downstream.out[i] = HYD_FD_CLOSED; } else { for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) if (HYD_pmcd_pmip.downstream.err[i] == fd) HYD_pmcd_pmip.downstream.err[i] = HYD_FD_CLOSED; } close(fd); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
static HYD_status stdoe_cb(int _fd, int pgid, int proxy_id, int rank, void *_buf, int buflen) { int fd = _fd; char *pattern_resolve, *pattern = NULL; struct stdoe_fd *tmp, *run; int sent, closed, mark, i; char *buf = (char *) _buf, *prepend; HYD_status status = HYD_SUCCESS; HYDU_FUNC_ENTER(); pattern = (_fd == STDOUT_FILENO) ? HYD_ui_info.outfile_pattern : (_fd == STDERR_FILENO) ? HYD_ui_info.errfile_pattern : NULL; if (pattern) { /* See if the pattern already exists */ status = resolve_pattern_string(pattern, &pattern_resolve, pgid, proxy_id, rank); HYDU_ERR_POP(status, "error resolving pattern\n"); for (run = stdoe_fd_list; run; run = run->next) if (!strcmp(run->pattern, pattern_resolve)) break; if (run) { fd = run->fd; MPL_free(pattern_resolve); } else { HYDU_MALLOC_OR_JUMP(tmp, struct stdoe_fd *, sizeof(struct stdoe_fd), status); tmp->pattern = pattern_resolve; tmp->fd = open(tmp->pattern, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); HYDU_ASSERT(tmp->fd >= 0, status); tmp->next = NULL; if (stdoe_fd_list == NULL) stdoe_fd_list = tmp; else { for (run = stdoe_fd_list; run->next; run = run->next); run->next = tmp; } fd = tmp->fd; } } if (HYD_ui_info.prepend_pattern == NULL) { status = HYDU_sock_write(fd, buf, buflen, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to stdout/stderr\n"); HYDU_ASSERT(!closed, status); } else { status = resolve_pattern_string(HYD_ui_info.prepend_pattern, &prepend, pgid, proxy_id, rank); HYDU_ERR_POP(status, "error resolving pattern\n"); mark = 0; for (i = 0; i < buflen; i++) { if (buf[i] == '\n' || i == buflen - 1) { if (prepend[0] != '\0') { /* sock_write barfs on maxlen==0 */ status = HYDU_sock_write(fd, (const void *) prepend, strlen(prepend), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to stdout/stderr\n"); } status = HYDU_sock_write(fd, (const void *) &buf[mark], i - mark + 1, &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to write data to stdout/stderr\n"); HYDU_ASSERT(!closed, status); mark = i + 1; } } MPL_free(prepend); } fn_exit: HYDU_FUNC_EXIT(); return status; fn_fail: goto fn_exit; }
int main(int argc, char **argv) { int i, count, pid, ret_status, sent, closed, ret, done; struct HYD_pmcd_hdr hdr; HYD_status status = HYD_SUCCESS; status = HYDU_dbg_init("proxy:unset"); HYDU_ERR_POP(status, "unable to initialization debugging\n"); status = HYDU_set_signal(SIGPIPE, signal_cb); HYDU_ERR_POP(status, "unable to set SIGPIPE\n"); status = HYDU_set_signal(SIGTSTP, signal_cb); HYDU_ERR_POP(status, "unable to set SIGTSTP\n"); status = HYDU_set_common_signals(signal_cb); HYDU_ERR_POP(status, "unable to set common signals\n"); status = init_params(); HYDU_ERR_POP(status, "Error initializing proxy params\n"); status = HYD_pmcd_pmip_get_params(argv); HYDU_ERR_POP(status, "bad parameters passed to the proxy\n"); status = HYDT_dmx_init(&HYD_pmcd_pmip.user_global.demux); HYDU_ERR_POP(status, "unable to initialize the demux engine\n"); status = HYDT_ftb_init(); HYDU_ERR_POP(status, "unable to initialize FTB\n"); /* See if HYDI_CONTROL_FD is set before trying to connect upstream */ ret = MPL_env2int("HYDI_CONTROL_FD", &HYD_pmcd_pmip.upstream.control); if (ret < 0) { HYDU_ERR_POP(status, "error reading HYDI_CONTROL_FD environment\n"); } else if (ret == 0) { status = HYDU_sock_connect(HYD_pmcd_pmip.upstream.server_name, HYD_pmcd_pmip.upstream.server_port, &HYD_pmcd_pmip.upstream.control, HYD_pmcd_pmip.local.retries, HYD_CONNECT_DELAY); HYDU_ERR_POP(status, "unable to connect to server %s at port %d (check for firewalls!)\n", HYD_pmcd_pmip.upstream.server_name, HYD_pmcd_pmip.upstream.server_port); } status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &HYD_pmcd_pmip.local.id, sizeof(HYD_pmcd_pmip.local.id), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send the proxy ID to the server\n"); if (closed) goto fn_fail; status = HYDT_dmx_register_fd(1, &HYD_pmcd_pmip.upstream.control, HYD_POLLIN, NULL, HYD_pmcd_pmip_control_cmd_cb); HYDU_ERR_POP(status, "unable to register fd\n"); while (1) { /* Wait for some event to occur */ status = HYDT_dmx_wait_for_event(-1); HYDU_ERR_POP(status, "demux engine error waiting for event\n"); /* Check to see if there's any open read socket left; if there * are, we will just wait for more events. */ count = 0; for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) { if (HYD_pmcd_pmip.downstream.out[i] != HYD_FD_CLOSED) count++; if (HYD_pmcd_pmip.downstream.err[i] != HYD_FD_CLOSED) count++; if (count) break; } if (!count) break; } /* Now wait for the processes to finish */ done = 0; while (1) { pid = waitpid(-1, &ret_status, 0); /* Find the pid and mark it as complete. */ if (pid > 0) for (i = 0; i < HYD_pmcd_pmip.local.proxy_process_count; i++) if (HYD_pmcd_pmip.downstream.pid[i] == pid) { if (HYD_pmcd_pmip.downstream.forced_cleanup) { /* If it is a forced cleanup, the exit status * is either already set or we have to ignore * it */ if (HYD_pmcd_pmip.downstream.exit_status[i] == -1) HYD_pmcd_pmip.downstream.exit_status[i] = 0; else HYD_pmcd_pmip.downstream.exit_status[i] = ret_status; } else { HYD_pmcd_pmip.downstream.exit_status[i] = ret_status; } done++; } /* If no more processes are pending, break out */ if (done == HYD_pmcd_pmip.local.proxy_process_count) break; /* Check if there are any messages from the launcher */ status = HYDT_dmx_wait_for_event(0); HYDU_IGNORE_TIMEOUT(status); HYDU_ERR_POP(status, "demux engine error waiting for event\n"); } /* Send the exit status upstream */ HYD_pmcd_init_header(&hdr); hdr.cmd = EXIT_STATUS; status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, &hdr, sizeof(hdr), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to send EXIT_STATUS command upstream\n"); HYDU_ASSERT(!closed, status); status = HYDU_sock_write(HYD_pmcd_pmip.upstream.control, HYD_pmcd_pmip.downstream.exit_status, HYD_pmcd_pmip.local.proxy_process_count * sizeof(int), &sent, &closed, HYDU_SOCK_COMM_MSGWAIT); HYDU_ERR_POP(status, "unable to return exit status upstream\n"); HYDU_ASSERT(!closed, status); status = HYDT_dmx_deregister_fd(HYD_pmcd_pmip.upstream.control); HYDU_ERR_POP(status, "unable to deregister fd\n"); close(HYD_pmcd_pmip.upstream.control); status = HYDT_dmx_finalize(); HYDU_ERR_POP(status, "error returned from demux finalize\n"); status = HYDT_ftb_finalize(); HYDU_ERR_POP(status, "unable to finalize FTB\n"); status = HYDT_bsci_finalize(); HYDU_ERR_POP(status, "unable to finalize the bootstrap device\n"); /* cleanup the params structure */ cleanup_params(); fn_exit: HYDU_dbg_finalize(); return status; fn_fail: /* kill all processes */ HYD_pmcd_pmip_send_signal(SIGKILL); goto fn_exit; }