void mca_oob_tcp_peer_shutdown(mca_oob_tcp_peer_t* peer) { /* giving up and cleanup any pending messages */ if(peer->peer_retries++ > mca_oob_tcp_component.tcp_peer_retries) { mca_oob_tcp_msg_t *msg; opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_shutdown: retries exceeded", ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_NAME_ARGS(&(peer->peer_name))); /* There are cases during the initial connection setup where the peer_send_msg is NULL but there are things in the queue -- handle that case */ if (NULL != (msg = peer->peer_send_msg)) { msg->msg_complete = true; msg->msg_rc = ORTE_ERR_UNREACH; mca_oob_tcp_msg_complete(msg, &peer->peer_name); } peer->peer_send_msg = NULL; while (NULL != (msg = (mca_oob_tcp_msg_t*)opal_list_remove_first(&peer->peer_send_queue))) { msg->msg_complete = true; msg->msg_rc = ORTE_ERR_UNREACH; mca_oob_tcp_msg_complete(msg, &peer->peer_name); } /* We were unsuccessful in establishing a connection, and are not likely to suddenly become successful, so abort the whole thing */ peer->peer_state = MCA_OOB_TCP_FAILED; } if (peer->peer_sd >= 0) { opal_event_del(&peer->peer_recv_event); opal_event_del(&peer->peer_send_event); CLOSE_THE_SOCKET(peer->peer_sd); peer->peer_sd = -1; } opal_event_del(&peer->peer_timer_event); peer->peer_state = MCA_OOB_TCP_CLOSED; }
/* * Initiate the appropriate action based on the state of the connection * to the peer. * */ int mca_oob_tcp_peer_send(mca_oob_tcp_peer_t* peer, mca_oob_tcp_msg_t* msg) { int rc = ORTE_SUCCESS; OPAL_THREAD_LOCK(&peer->peer_lock); switch(peer->peer_state) { case MCA_OOB_TCP_CONNECTING: case MCA_OOB_TCP_CONNECT_ACK: case MCA_OOB_TCP_CLOSED: case MCA_OOB_TCP_RESOLVE: /* * queue the message and attempt to resolve the peer address */ opal_list_append(&peer->peer_send_queue, (opal_list_item_t*)msg); if(peer->peer_state == MCA_OOB_TCP_CLOSED) { peer->peer_state = MCA_OOB_TCP_RESOLVE; OPAL_THREAD_UNLOCK(&peer->peer_lock); return mca_oob_tcp_resolve(peer); } break; case MCA_OOB_TCP_FAILED: rc = ORTE_ERR_UNREACH; break; case MCA_OOB_TCP_CONNECTED: /* * start the message and queue if not completed */ if (NULL != peer->peer_send_msg) { opal_list_append(&peer->peer_send_queue, (opal_list_item_t*)msg); } else { /*if the send does not complete */ if(!mca_oob_tcp_msg_send_handler(msg, peer)) { peer->peer_send_msg = msg; opal_event_add(&peer->peer_send_event, 0); } else { mca_oob_tcp_msg_complete(msg, &peer->peer_name); } } break; } OPAL_THREAD_UNLOCK(&peer->peer_lock); return rc; }
/* * A file descriptor is available/ready for send. Check the state * of the socket and take the appropriate action. */ static void mca_oob_tcp_peer_send_handler(int sd, short flags, void* user) { mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t *)user; OPAL_THREAD_LOCK(&peer->peer_lock); switch(peer->peer_state) { case MCA_OOB_TCP_CONNECTING: mca_oob_tcp_peer_complete_connect(peer); break; case MCA_OOB_TCP_CONNECTED: { while(peer->peer_send_msg != NULL) { /* complete the current send */ mca_oob_tcp_msg_t* msg = peer->peer_send_msg; if(mca_oob_tcp_msg_send_handler(msg, peer)) { mca_oob_tcp_msg_complete(msg, &peer->peer_name); } else { break; } /* if current completed - progress any pending sends */ peer->peer_send_msg = (mca_oob_tcp_msg_t*) opal_list_remove_first(&peer->peer_send_queue); } /* if nothing else to do unregister for send event notifications */ if(NULL == peer->peer_send_msg) { opal_event_del(&peer->peer_send_event); } break; } default: opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_send_handler: invalid connection state (%d)", ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_NAME_ARGS(&(peer->peer_name)), peer->peer_state); opal_event_del(&peer->peer_send_event); break; } OPAL_THREAD_UNLOCK(&peer->peer_lock); }
/* * Progress a completed recv: * (1) signal a posted recv as complete * (2) queue an unexpected message in the recv list */ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* peer) { /* attempt to match unexpected message to a posted recv */ mca_oob_tcp_msg_t* post; int rc; OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); /* if I'm not a proc, check if this message came from * another job family - procs dont' need to do this because * they always route through their daemons anyway */ if (!ORTE_PROC_IS_MPI) { if ((ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) && (0 != ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid))) { /* this message came from a different job family that is not * a local slave, so we may * not know how to route any reply back to the originator. Update * our route so we can dynamically build the routing table */ if (ORTE_SUCCESS != (rc = orte_routed.update_route(&(msg->msg_hdr.msg_origin), &(msg->msg_hdr.msg_src)))) { /* Nothing we can do about errors here as we definitely want * the receive to complete, but at least bark loudly */ ORTE_ERROR_LOG(rc); } } } /* match msg against posted receives */ post = mca_oob_tcp_msg_match_post(&msg->msg_hdr.msg_origin, msg->msg_hdr.msg_tag); if(NULL != post) { if(NULL == post->msg_uiov || 0 == post->msg_ucnt) { opal_output(0, "msg_data returning bad param"); post->msg_rc = ORTE_ERR_BAD_PARAM; } else { /* copy msg data into posted recv */ if (post->msg_flags & ORTE_RML_ALLOC) msg->msg_flags |= ORTE_RML_ALLOC; post->msg_rc = mca_oob_tcp_msg_copy(msg, post->msg_uiov, post->msg_ucnt); if(post->msg_flags & ORTE_RML_TRUNC) { int i, size = 0; for(i=1; i<msg->msg_rwcnt+1; i++) size += msg->msg_rwiov[i].iov_len; post->msg_rc = size; } } if(post->msg_flags & ORTE_RML_PEEK) { /* will need message for actual receive */ opal_list_append(&mca_oob_tcp_component.tcp_msg_recv, &msg->super.super); } else { MCA_OOB_TCP_MSG_RETURN(msg); } mca_oob_tcp_component.tcp_match_count++; OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); if(post->msg_flags & ORTE_RML_PERSISTENT) { post->msg_cbfunc( post->msg_rc, &peer->peer_name, post->msg_uiov, post->msg_ucnt, post->msg_hdr.msg_tag, post->msg_cbdata); } else { mca_oob_tcp_msg_complete(post, &msg->msg_hdr.msg_origin); } OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); if(--mca_oob_tcp_component.tcp_match_count == 0) opal_condition_signal(&mca_oob_tcp_component.tcp_match_cond); OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); } else { opal_list_append(&mca_oob_tcp_component.tcp_msg_recv, (opal_list_item_t*)msg); OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); } }