static void process_resend(int fd, short args, void *cbdata) { mca_oob_tcp_msg_error_t *op = (mca_oob_tcp_msg_error_t*)cbdata; mca_oob_tcp_peer_t *peer; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:tcp processing resend to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&op->hop)); /* do we know this peer? */ if (NULL == (peer = mca_oob_tcp_peer_lookup(&op->hop))) { /* push this back to the component so it can try * another module within this transport. If no * module can be found, the component can push back * to the framework so another component can try */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] peer %s unknown", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&op->hop)); ORTE_ACTIVATE_TCP_MSG_ERROR(op->snd, NULL, &op->hop, mca_oob_tcp_component_hop_unknown); goto cleanup; } /* add the msg to this peer's send queue */ if (MCA_OOB_TCP_CONNECTED == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s tcp:resend: already connected to %s - queueing for send", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); MCA_OOB_TCP_QUEUE_MSG(peer, op->snd, true); goto cleanup; } if (MCA_OOB_TCP_CONNECTING != peer->state && MCA_OOB_TCP_CONNECT_ACK != peer->state) { /* add the message to the queue for sending after the * connection is formed */ MCA_OOB_TCP_QUEUE_MSG(peer, op->snd, false); /* we have to initiate the connection - again, we do not * want to block while the connection is created. * So throw us into an event that will create * the connection via a mini-state-machine :-) */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s tcp:send_nb: initiating connection to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); peer->state = MCA_OOB_TCP_CONNECTING; ORTE_ACTIVATE_TCP_CONN_STATE(peer, mca_oob_tcp_peer_try_connect); } cleanup: OBJ_RELEASE(op); }
/* API functions */ static void ping(const orte_process_name_t *proc) { mca_oob_tcp_peer_t *peer; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] processing ping to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(proc)); /* do we know this peer? */ if (NULL == (peer = mca_oob_tcp_peer_lookup(proc))) { /* push this back to the component so it can try * another module within this transport. If no * module can be found, the component can push back * to the framework so another component can try */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] hop %s unknown", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(proc)); ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, NULL, proc, mca_oob_tcp_component_hop_unknown); return; } /* has this peer had a progress thread assigned yet? */ if (NULL == peer->ev_base) { /* nope - assign one */ ORTE_OOB_TCP_NEXT_BASE(peer); } /* if we are already connected, there is nothing to do */ if (MCA_OOB_TCP_CONNECTED == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] already connected to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(proc)); return; } /* if we are already connecting, there is nothing to do */ if (MCA_OOB_TCP_CONNECTING == peer->state || MCA_OOB_TCP_CONNECT_ACK == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] already connecting to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(proc)); return; } /* attempt the connection */ peer->state = MCA_OOB_TCP_CONNECTING; ORTE_ACTIVATE_TCP_CONN_STATE(peer, mca_oob_tcp_peer_try_connect); }
/* API functions */ static void process_ping(int fd, short args, void *cbdata) { mca_oob_tcp_ping_t *op = (mca_oob_tcp_ping_t*)cbdata; mca_oob_tcp_peer_t *peer; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] processing ping to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&op->peer)); /* do we know this peer? */ if (NULL == (peer = mca_oob_tcp_peer_lookup(&op->peer))) { /* push this back to the component so it can try * another module within this transport. If no * module can be found, the component can push back * to the framework so another component can try */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] hop %s unknown", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&op->peer)); ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, NULL, &op->peer, mca_oob_tcp_component_hop_unknown); goto cleanup; } /* if we are already connected, there is nothing to do */ if (MCA_OOB_TCP_CONNECTED == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] already connected to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&op->peer)); goto cleanup; } /* if we are already connecting, there is nothing to do */ if (MCA_OOB_TCP_CONNECTING == peer->state && MCA_OOB_TCP_CONNECT_ACK == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] already connecting to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&op->peer)); goto cleanup; } /* attempt the connection */ peer->state = MCA_OOB_TCP_CONNECTING; ORTE_ACTIVATE_TCP_CONN_STATE(peer, mca_oob_tcp_peer_try_connect); cleanup: OBJ_RELEASE(op); }
void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) { mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t*)cbdata; int rc; orte_process_name_t hop; mca_oob_tcp_peer_t *relay; uint64_t ui64; if (orte_abnormal_term_ordered) { return; } opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); switch (peer->state) { case MCA_OOB_TCP_CONNECT_ACK: if (ORTE_SUCCESS == (rc = mca_oob_tcp_peer_recv_connect_ack(peer, peer->sd, NULL))) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler starting send/recv events", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* we connected! Start the send/recv events */ if (!peer->recv_ev_active) { opal_event_add(&peer->recv_event, 0); peer->recv_ev_active = true; } if (peer->timer_ev_active) { opal_event_del(&peer->timer_event); peer->timer_ev_active = false; } /* if there is a message waiting to be sent, queue it */ if (NULL == peer->send_msg) { peer->send_msg = (mca_oob_tcp_send_t*)opal_list_remove_first(&peer->send_queue); } if (NULL != peer->send_msg && !peer->send_ev_active) { opal_event_add(&peer->send_event, 0); peer->send_ev_active = true; } /* update our state */ peer->state = MCA_OOB_TCP_CONNECTED; } else { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s UNABLE TO COMPLETE CONNECT ACK WITH %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); opal_event_del(&peer->recv_event); ORTE_FORCED_TERMINATE(1); return; } break; case MCA_OOB_TCP_CONNECTED: opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler CONNECTED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* allocate a new message and setup for recv */ if (NULL == peer->recv_msg) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler allocate new recv msg", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); peer->recv_msg = OBJ_NEW(mca_oob_tcp_recv_t); if (NULL == peer->recv_msg) { opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to allocate recv message\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name))); return; } /* start by reading the header */ peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr; peer->recv_msg->rdbytes = sizeof(mca_oob_tcp_hdr_t); } /* if the header hasn't been completely read, read it */ if (!peer->recv_msg->hdr_recvd) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler read hdr", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); if (ORTE_SUCCESS == (rc = read_bytes(peer))) { OPAL_TIMING_EVENT((&tm,"Header received from %s", ORTE_NAME_PRINT(&peer->name))); /* completed reading the header */ peer->recv_msg->hdr_recvd = true; /* convert the header */ MCA_OOB_TCP_HDR_NTOH(&peer->recv_msg->hdr); /* if this is a zero-byte message, then we are done */ if (0 == peer->recv_msg->hdr.nbytes) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s RECVD ZERO-BYTE MESSAGE FROM %s for tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name), peer->recv_msg->hdr.tag); peer->recv_msg->data = NULL; // make sure } else { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler allocate data region of size %lu", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)peer->recv_msg->hdr.nbytes); /* allocate the data region */ peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes); /* point to it */ peer->recv_msg->rdptr = peer->recv_msg->data; peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes; } /* fall thru and attempt to read the data */ } else if (ORTE_ERR_RESOURCE_BUSY == rc || ORTE_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ return; } else { /* close the connection */ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler error reading bytes - closing connection", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); mca_oob_tcp_peer_close(peer); return; } } if (peer->recv_msg->hdr_recvd) { /* continue to read the data block - we start from * wherever we left off, which could be at the * beginning or somewhere in the message */ if (ORTE_SUCCESS == (rc = read_bytes(peer))) { OPAL_TIMING_EVENT((&tm,"Msg received from %s", ORTE_NAME_PRINT(&peer->name))); /* we recvd all of the message */ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s RECVD COMPLETE MESSAGE FROM %s (ORIGIN %s) OF %d BYTES FOR DEST %s TAG %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name), ORTE_NAME_PRINT(&peer->recv_msg->hdr.origin), (int)peer->recv_msg->hdr.nbytes, ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst), peer->recv_msg->hdr.tag); /* am I the intended recipient (header was already converted back to host order)? */ if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid && peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) { /* yes - post it to the RML for delivery */ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s DELIVERING TO RML", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag, peer->recv_msg->data, peer->recv_msg->hdr.nbytes); OBJ_RELEASE(peer->recv_msg); } else { /* no - find the next hop in the route */ hop = orte_routed.get_route(&peer->recv_msg->hdr.dst); if (hop.jobid == ORTE_JOBID_INVALID || hop.vpid == ORTE_VPID_INVALID) { /* no hop known - post the error to the component * and let the OOB see if there is another way * to get there from here */ opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s NO ROUTE TO %s FROM HERE", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); /* let the component know about the problem */ ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, peer->recv_msg, &hop, mca_oob_tcp_component_no_route); /* cleanup */ OBJ_RELEASE(peer->recv_msg); return; } else { /* does we know how to reach the next hop? */ memcpy(&ui64, (char*)&hop, sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_tcp_module.peers, ui64, (void**)&relay)) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s ADDRESS OF NEXT HOP %s TO %s IS UNKNOWN", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&hop), ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst)); /* let the component know about the problem */ ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, peer->recv_msg, &hop, mca_oob_tcp_component_hop_unknown); /* cleanup */ OBJ_RELEASE(peer->recv_msg); return; } opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s ROUTING TO %s FROM HERE", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&relay->name)); /* if this came from a different job family, then ensure * we know how to return */ if (ORTE_JOB_FAMILY(peer->recv_msg->hdr.origin.jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { orte_routed.update_route(&(peer->recv_msg->hdr.origin), &peer->name); } /* post the message for retransmission */ MCA_OOB_TCP_QUEUE_RELAY(peer->recv_msg, relay); OBJ_RELEASE(peer->recv_msg); } } peer->recv_msg = NULL; return; } else if (ORTE_ERR_RESOURCE_BUSY == rc || ORTE_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ return; } else { // report the error opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to recv message", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name))); /* turn off the recv event */ opal_event_del(&peer->recv_event); ORTE_FORCED_TERMINATE(1); return; } } break; default: opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: invalid socket state(%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), peer->state); // mca_oob_tcp_peer_close(peer); break; } }