int MPID_nem_tcp_connect_to_root (const char *business_card, MPIDI_VC_t *new_vc) { int mpi_errno = MPI_SUCCESS; struct in_addr addr; MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(new_vc); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CONNECT_TO_ROOT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_CONNECT_TO_ROOT); /* vc is already allocated before reaching this point */ mpi_errno = MPID_nem_tcp_get_addr_port_from_bc(business_card, &addr, &vc_tcp->sock_id.sin_port); vc_tcp->sock_id.sin_addr.s_addr = addr.s_addr; if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIDI_GetTagFromPort(business_card, &new_vc->port_name_tag); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_nem_tcp_connect(new_vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_CONNECT_TO_ROOT); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_tcp_pkt_unpause_handler(MPIDI_VC_t *vc, MPIDI_CH3_Pkt_t *pkt, intptr_t *buflen, MPIR_Request **rreqp) { int mpi_errno = MPI_SUCCESS; MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc); MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_UNPAUSE_HANDLER); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_UNPAUSE_HANDLER); vc_tcp->send_paused = FALSE; /* There may be a unpause message in the send queue. If so, just enqueue everything on the send queue. */ if (MPIDI_CH3I_Sendq_empty(vc_tcp->send_queue)) mpi_errno = MPID_nem_tcp_send_queued(vc, &vc_tcp->paused_send_queue); /* if anything is left on the paused queue, put it on the send queue and wait for the reconnect */ if (!MPIDI_CH3I_Sendq_empty(vc_tcp->paused_send_queue)) { MPIDI_CH3I_Sendq_enqueue_multiple_no_refcount(&vc_tcp->send_queue, vc_tcp->paused_send_queue.head, vc_tcp->paused_send_queue.tail); vc_tcp->paused_send_queue.head = vc_tcp->paused_send_queue.tail = NULL; } fn_exit: *buflen = sizeof(MPIDI_CH3_Pkt_t); MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_UNPAUSE_HANDLER); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_tcp_get_conninfo (struct MPIDI_VC *vc, struct sockaddr_in *addr, char **pg_id, int *pg_rank) { int mpi_errno = MPI_SUCCESS; MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc); *addr = vc_tcp->sock_id; *pg_id = (char *)vc->pg->id; *pg_rank = vc->pg_rank; return mpi_errno; }
int MPID_nem_tcp_ckpt_pause_send_vc(MPIDI_VC_t *vc) { int mpi_errno = MPI_SUCCESS; MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc); MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_TCP_CKPT_PAUSE_SEND_VC); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_TCP_CKPT_PAUSE_SEND_VC); vc_tcp->send_paused = TRUE; fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_TCP_CKPT_PAUSE_SEND_VC); return mpi_errno; fn_fail: goto fn_exit; }
int MPID_nem_tcp_vc_init (MPIDI_VC_t *vc) { int mpi_errno = MPI_SUCCESS; MPIDI_CH3I_VC *vc_ch = &vc->ch; MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc); MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_VC_INIT); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_VC_INIT); vc_tcp->state = MPID_NEM_TCP_VC_STATE_DISCONNECTED; vc->sendNoncontig_fn = MPID_nem_tcp_SendNoncontig; vc_ch->iStartContigMsg = MPID_nem_tcp_iStartContigMsg; vc_ch->iSendContig = MPID_nem_tcp_iSendContig; #ifdef ENABLE_CHECKPOINTING vc_ch->ckpt_pause_send_vc = MPID_nem_tcp_ckpt_pause_send_vc; vc_ch->ckpt_continue_vc = MPID_nem_tcp_ckpt_continue_vc; vc_ch->ckpt_restart_vc = MPID_nem_tcp_ckpt_restart_vc; pkt_handlers[MPIDI_NEM_TCP_PKT_UNPAUSE] = MPID_nem_tcp_pkt_unpause_handler; #endif vc_ch->pkt_handler = pkt_handlers; vc_ch->num_pkt_handlers = MPIDI_NEM_TCP_PKT_NUM_TYPES; memset(&vc_tcp->sock_id, 0, sizeof(vc_tcp->sock_id)); vc_tcp->sock_id.sin_family = AF_INET; vc_ch->next = NULL; vc_ch->prev = NULL; ASSIGN_SC_TO_VC(vc_tcp, NULL); vc_tcp->send_queue.head = vc_tcp->send_queue.tail = NULL; vc_tcp->send_paused = FALSE; vc_tcp->paused_send_queue.head = vc_tcp->paused_send_queue.tail = NULL; vc_tcp->sc_ref_count = 0; vc_tcp->connect_retry_count = 0; MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_VC_INIT); return mpi_errno; }
void MPID_nem_tcp_vc_dbg_print_sendq(FILE *stream, MPIDI_VC_t *vc) { int i; MPID_Request *sreq; MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc); fprintf(stream, ".. sc=%p fd=%d vc_tcp->state=%d\n", vc_tcp->sc, (vc_tcp->sc ? vc_tcp->sc->fd : -1), vc_tcp->state); /* This function violates any abstraction in the queues, since there's no good way to print them without inspecting the internals. */ sreq = vc_tcp->send_queue.head; i = 0; while (sreq) { fprintf(stream, "....[%d] sreq=%p ctx=%#x rank=%d tag=%d\n", i, sreq, sreq->dev.match.parts.context_id, sreq->dev.match.parts.rank, sreq->dev.match.parts.tag); ++i; sreq = sreq->dev.next; } }
int MPID_nem_tcp_vc_terminate(MPIDI_VC_t *vc) { int mpi_errno = MPI_SUCCESS; int req_errno = MPI_SUCCESS; MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_TCP_VC_TERMINATE); MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_TCP_VC_TERMINATE); if (vc->state != MPIDI_VC_STATE_CLOSED) { /* VC is terminated as a result of a fault. Complete outstanding sends with an error and terminate connection immediately. */ MPIR_ERR_SET1(req_errno, MPIX_ERR_PROC_FAILED, "**comm_fail", "**comm_fail %d", vc->pg_rank); mpi_errno = MPID_nem_tcp_error_out_send_queue(vc, req_errno); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPID_nem_tcp_vc_terminated(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } else { MPID_nem_tcp_vc_area *vc_tcp = VC_TCP(vc); /* VC is terminated as a result of the close protocol. Wait for sends to complete, then terminate. */ if (MPIDI_CH3I_Sendq_empty(vc_tcp->send_queue)) { /* The sendq is empty, so we can immediately terminate the connection. */ mpi_errno = MPID_nem_tcp_vc_terminated(vc); if (mpi_errno) MPIR_ERR_POP(mpi_errno); } /* else: just return. We'll call vc_terminated() from the commrdy_handler once the sendq is empty. */ } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_TCP_VC_TERMINATE); return mpi_errno; fn_fail: goto fn_exit; }