/* * Receive the first message identifying initiator */ static void _direct_conn_establish(pmixp_conn_t *conn, void *_hdr, void *msg) { pmixp_io_engine_t *eng = pmixp_conn_get_eng(conn); pmixp_base_hdr_t *hdr = (pmixp_base_hdr_t *)_hdr; pmixp_dconn_t *dconn = NULL; pmixp_conn_t *new_conn; eio_obj_t *obj; int fd; fd = pmixp_io_detach(eng); dconn = pmixp_dconn_accept(hdr->nodeid, fd); if (!dconn) { /* connection was refused because we already * have established connection * It seems that some sort of race condition occured */ char *nodename = pmixp_info_job_host(hdr->nodeid); close(fd); PMIXP_ERROR("Failed to accept direct connection from %s", nodename); xfree(nodename); return; } new_conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT, pmixp_dconn_engine(dconn), _direct_new_msg_conn, _direct_return_connection, dconn); pmixp_dconn_unlock(dconn); obj = eio_obj_create(fd, &direct_peer_ops, (void *)new_conn); eio_new_obj(pmixp_info_io(), obj); /* wakeup this connection to get processed */ eio_signal_wakeup(pmixp_info_io()); }
void pmixp_server_direct_conn(int fd) { eio_obj_t *obj; pmixp_conn_t *conn; PMIXP_DEBUG("Request from fd = %d", fd); /* Set nonblocking */ fd_set_nonblocking(fd); fd_set_close_on_exec(fd); pmixp_fd_set_nodelay(fd); conn = pmixp_conn_new_temp(PMIXP_PROTO_DIRECT, fd, _direct_conn_establish); /* try to process right here */ pmixp_conn_progress_rcv(conn); if (!pmixp_conn_is_alive(conn)) { /* success, don't need this connection anymore */ pmixp_conn_return(conn); return; } /* If it is a blocking operation: create AIO object to * handle it */ obj = eio_obj_create(fd, &direct_peer_ops, (void *)conn); eio_new_obj(pmixp_info_io(), obj); /* wakeup this connection to get processed */ eio_signal_wakeup(pmixp_info_io()); }
/* * TODO: we need to keep track of the "me" * structures created here, because we need to * free them in "pmixp_stepd_finalize" */ void pmixp_server_slurm_conn(int fd) { eio_obj_t *obj; pmixp_conn_t *conn = NULL; PMIXP_DEBUG("Request from fd = %d", fd); pmixp_debug_hang(0); /* Set nonblocking */ fd_set_nonblocking(fd); fd_set_close_on_exec(fd); conn = pmixp_conn_new_temp(PMIXP_PROTO_SLURM, fd, _slurm_new_msg); /* try to process right here */ pmixp_conn_progress_rcv(conn); if (!pmixp_conn_is_alive(conn)) { /* success, don't need this connection anymore */ pmixp_conn_return(conn); return; } /* If it is a blocking operation: create AIO object to * handle it */ obj = eio_obj_create(fd, &slurm_peer_ops, (void *)conn); eio_new_obj(pmixp_info_io(), obj); }
/* * TODO: we need to keep track of the "me" * structures created here, because we need to * free them in "pmixp_stepd_finalize" */ void pmix_server_new_conn(int fd) { eio_obj_t *obj; PMIXP_DEBUG("Request from fd = %d", fd); /* Set nonblocking */ fd_set_nonblocking(fd); fd_set_close_on_exec(fd); pmixp_io_engine_t *me = xmalloc(sizeof(pmixp_io_engine_t)); pmix_io_init(me, fd, srv_rcvd_header); /* We use slurm_forward_data to send message to stepd's * SLURM will put user ID there. We need to skip it. */ pmix_io_rcvd_padding(me, sizeof(uint32_t)); if( 2 == _process_message(me) ){ /* connection was fully processed here */ xfree(me); return; } /* If it is a blocking operation: create AIO object to * handle it */ obj = eio_obj_create(fd, &peer_ops, (void *)me); eio_new_obj(pmixp_info_io(), obj); }
static int _process_extended_hdr(pmixp_base_hdr_t *hdr, Buf buf) { char nhdr[PMIXP_BASE_HDR_MAX]; bool send_init = false; size_t dsize = 0, hsize = 0; pmixp_dconn_t *dconn; _direct_proto_message_t *init_msg = NULL; int rc = SLURM_SUCCESS; char *ep_data = NULL; uint32_t ep_len = 0; dconn = pmixp_dconn_lock(hdr->nodeid); if (!dconn) { /* Should not happen */ xassert( dconn ); abort(); } /* Retrieve endpoint information */ _base_hdr_unpack_ext(buf, &ep_data, &ep_len); /* Check if init message is required to establish * the connection */ if (!pmixp_dconn_require_connect(dconn, &send_init)) { goto unlock; } if (send_init) { Buf buf_init = pmixp_server_buf_new(); pmixp_base_hdr_t bhdr; init_msg = xmalloc(sizeof(*init_msg)); PMIXP_BASE_HDR_SETUP(bhdr, PMIXP_MSG_INIT_DIRECT, 0, buf_init); bhdr.ext_flag = 1; hsize = _direct_hdr_pack(&bhdr, nhdr); init_msg->sent_cb = pmixp_server_sent_buf_cb; init_msg->cbdata = buf_init; init_msg->hdr = bhdr; init_msg->buffer = _buf_finalize(buf_init, nhdr, hsize, &dsize); init_msg->buf_ptr = buf_init; } rc = pmixp_dconn_connect(dconn, ep_data, ep_len, init_msg); if (rc) { PMIXP_ERROR("Unable to connect to %d", dconn->nodeid); if (init_msg) { /* need to release `init_msg` here */ free_buf(init_msg->buf_ptr); xfree(init_msg); } goto unlock; } switch (pmixp_dconn_progress_type(dconn)) { case PMIXP_DCONN_PROGRESS_SW:{ /* this direct connection has fd that needs to be * polled to progress, use connection interface for that */ pmixp_io_engine_t *eng = pmixp_dconn_engine(dconn); pmixp_conn_t *conn; conn = pmixp_conn_new_persist(PMIXP_PROTO_DIRECT, eng, _direct_new_msg_conn, _direct_return_connection, dconn); if (conn) { eio_obj_t *obj; obj = eio_obj_create(pmixp_io_fd(eng), &direct_peer_ops, (void *)conn); eio_new_obj(pmixp_info_io(), obj); eio_signal_wakeup(pmixp_info_io()); } else { /* TODO: handle this error */ rc = SLURM_ERROR; goto unlock; } break; } case PMIXP_DCONN_PROGRESS_HW: { break; } default: /* Should not happen */ xassert(0 && pmixp_dconn_progress_type(dconn)); /* TODO: handle this error */ } unlock: pmixp_dconn_unlock(dconn); return rc; }