static void process_resend(int fd, short args, void *cbdata) { mca_oob_tcp_msg_error_t *op = (mca_oob_tcp_msg_error_t*)cbdata; mca_oob_tcp_peer_t *peer; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:tcp processing resend to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&op->hop)); /* do we know this peer? */ if (NULL == (peer = mca_oob_tcp_peer_lookup(&op->hop))) { /* push this back to the component so it can try * another module within this transport. If no * module can be found, the component can push back * to the framework so another component can try */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] peer %s unknown", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&op->hop)); ORTE_ACTIVATE_TCP_MSG_ERROR(op->snd, NULL, &op->hop, mca_oob_tcp_component_hop_unknown); goto cleanup; } /* add the msg to this peer's send queue */ if (MCA_OOB_TCP_CONNECTED == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s tcp:resend: already connected to %s - queueing for send", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); MCA_OOB_TCP_QUEUE_MSG(peer, op->snd, true); goto cleanup; } if (MCA_OOB_TCP_CONNECTING != peer->state && MCA_OOB_TCP_CONNECT_ACK != peer->state) { /* add the message to the queue for sending after the * connection is formed */ MCA_OOB_TCP_QUEUE_MSG(peer, op->snd, false); /* we have to initiate the connection - again, we do not * want to block while the connection is created. * So throw us into an event that will create * the connection via a mini-state-machine :-) */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s tcp:send_nb: initiating connection to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); peer->state = MCA_OOB_TCP_CONNECTING; ORTE_ACTIVATE_TCP_CONN_STATE(peer, mca_oob_tcp_peer_try_connect); } cleanup: OBJ_RELEASE(op); }
/* API functions */ static void ping(const orte_process_name_t *proc) { mca_oob_tcp_peer_t *peer; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] processing ping to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(proc)); /* do we know this peer? */ if (NULL == (peer = mca_oob_tcp_peer_lookup(proc))) { /* push this back to the component so it can try * another module within this transport. If no * module can be found, the component can push back * to the framework so another component can try */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] hop %s unknown", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(proc)); ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, NULL, proc, mca_oob_tcp_component_hop_unknown); return; } /* has this peer had a progress thread assigned yet? */ if (NULL == peer->ev_base) { /* nope - assign one */ ORTE_OOB_TCP_NEXT_BASE(peer); } /* if we are already connected, there is nothing to do */ if (MCA_OOB_TCP_CONNECTED == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] already connected to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(proc)); return; } /* if we are already connecting, there is nothing to do */ if (MCA_OOB_TCP_CONNECTING == peer->state || MCA_OOB_TCP_CONNECT_ACK == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] already connecting to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(proc)); return; } /* attempt the connection */ peer->state = MCA_OOB_TCP_CONNECTING; ORTE_ACTIVATE_TCP_CONN_STATE(peer, mca_oob_tcp_peer_try_connect); }
/* * Event callback when there is data available on the registered * socket to recv. This is called for the listen sockets to accept an * incoming connection, on new sockets trying to complete the software * connection process, and for probes. Data on an established * connection is handled elsewhere. */ static void recv_handler(int sd, short flg, void *cbdata) { mca_oob_tcp_conn_op_t *op = (mca_oob_tcp_conn_op_t*)cbdata; int flags; uint64_t *ui64; mca_oob_tcp_hdr_t hdr; mca_oob_tcp_peer_t *peer; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:recv:handler called", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* get the handshake */ if (ORTE_SUCCESS != mca_oob_tcp_peer_recv_connect_ack(NULL, sd, &hdr)) { goto cleanup; } /* finish processing ident */ if (MCA_OOB_TCP_IDENT == hdr.type) { if (NULL == (peer = mca_oob_tcp_peer_lookup(&hdr.origin))) { /* should never happen */ mca_oob_tcp_peer_close(peer); goto cleanup; } /* set socket up to be non-blocking */ if ((flags = fcntl(sd, F_GETFL, 0)) < 0) { opal_output(0, "%s mca_oob_tcp_recv_connect: fcntl(F_GETFL) failed: %s (%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno); } else { flags |= O_NONBLOCK; if (fcntl(sd, F_SETFL, flags) < 0) { opal_output(0, "%s mca_oob_tcp_recv_connect: fcntl(F_SETFL) failed: %s (%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno); } } /* is the peer instance willing to accept this connection */ peer->sd = sd; if (mca_oob_tcp_peer_accept(peer) == false) { if (OOB_TCP_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) { opal_output(0, "%s-%s mca_oob_tcp_recv_connect: " "rejected connection from %s connection state %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), ORTE_NAME_PRINT(&(hdr.origin)), peer->state); } CLOSE_THE_SOCKET(sd); ui64 = (uint64_t*)(&peer->name); opal_hash_table_set_value_uint64(&mca_oob_tcp_module.peers, (*ui64), NULL); OBJ_RELEASE(peer); } } cleanup: OBJ_RELEASE(op); }
/* API functions */ static void process_ping(int fd, short args, void *cbdata) { mca_oob_tcp_ping_t *op = (mca_oob_tcp_ping_t*)cbdata; mca_oob_tcp_peer_t *peer; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] processing ping to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&op->peer)); /* do we know this peer? */ if (NULL == (peer = mca_oob_tcp_peer_lookup(&op->peer))) { /* push this back to the component so it can try * another module within this transport. If no * module can be found, the component can push back * to the framework so another component can try */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] hop %s unknown", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&op->peer)); ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, NULL, &op->peer, mca_oob_tcp_component_hop_unknown); goto cleanup; } /* if we are already connected, there is nothing to do */ if (MCA_OOB_TCP_CONNECTED == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] already connected to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&op->peer)); goto cleanup; } /* if we are already connecting, there is nothing to do */ if (MCA_OOB_TCP_CONNECTING == peer->state && MCA_OOB_TCP_CONNECT_ACK == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] already connecting to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&op->peer)); goto cleanup; } /* attempt the connection */ peer->state = MCA_OOB_TCP_CONNECTING; ORTE_ACTIVATE_TCP_CONN_STATE(peer, mca_oob_tcp_peer_try_connect); cleanup: OBJ_RELEASE(op); }
/* * Record listening address for this peer - the connection * is created on first-send */ static void process_set_peer(int fd, short args, void *cbdata) { mca_oob_tcp_peer_op_t *pop = (mca_oob_tcp_peer_op_t*)cbdata; struct sockaddr inaddr; mca_oob_tcp_peer_t *peer; int rc=ORTE_SUCCESS; uint64_t *ui64 = (uint64_t*)(&pop->peer); mca_oob_tcp_addr_t *maddr; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:tcp:processing set_peer cmd", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); if (AF_INET != pop->af_family) { opal_output_verbose(20, orte_oob_base_framework.framework_output, "%s NOT AF_INET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); goto cleanup; } if (NULL == (peer = mca_oob_tcp_peer_lookup(&pop->peer))) { peer = OBJ_NEW(mca_oob_tcp_peer_t); peer->name.jobid = pop->peer.jobid; peer->name.vpid = pop->peer.vpid; opal_output_verbose(20, orte_oob_base_framework.framework_output, "%s SET_PEER ADDING PEER %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pop->peer)); if (OPAL_SUCCESS != opal_hash_table_set_value_uint64(&mca_oob_tcp_module.peers, (*ui64), peer)) { OBJ_RELEASE(peer); return; } } if ((rc = parse_uri(pop->af_family, pop->net, pop->port, (struct sockaddr*) &inaddr)) != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); goto cleanup; } opal_output_verbose(20, orte_oob_base_framework.framework_output, "%s set_peer: peer %s is listening on net %s port %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pop->peer), (NULL == pop->net) ? "NULL" : pop->net, (NULL == pop->port) ? "NULL" : pop->port); maddr = OBJ_NEW(mca_oob_tcp_addr_t); memcpy(&maddr->addr, &inaddr, sizeof(inaddr)); opal_list_append(&peer->addrs, &maddr->super); cleanup: OBJ_RELEASE(pop); }
static void process_send(int fd, short args, void *cbdata) { mca_oob_tcp_msg_op_t *op = (mca_oob_tcp_msg_op_t*)cbdata; mca_oob_tcp_peer_t *peer; orte_process_name_t hop; opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] processing send to peer %s:%d to channel =%d seq_num = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&op->msg->dst), op->msg->tag, op->msg->dst_channel, op->msg->seq_num); /* do we have a route to this peer (could be direct)? */ hop = orte_routed.get_route(&op->msg->dst); /* do we know this hop? */ if (NULL == (peer = mca_oob_tcp_peer_lookup(&hop))) { /* push this back to the component so it can try * another module within this transport. If no * module can be found, the component can push back * to the framework so another component can try */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] hop %s unknown", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&hop)); ORTE_ACTIVATE_TCP_NO_ROUTE(op->msg, &hop, mca_oob_tcp_component_no_route); goto cleanup; } /* add the msg to the hop's send queue */ if (MCA_OOB_TCP_CONNECTED == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s tcp:send_nb: already connected to %s - queueing for send", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); MCA_OOB_TCP_QUEUE_SEND(op->msg, peer); goto cleanup; } /* add the message to the queue for sending after the * connection is formed */ MCA_OOB_TCP_QUEUE_PENDING(op->msg, peer); if (MCA_OOB_TCP_CONNECTING != peer->state && MCA_OOB_TCP_CONNECT_ACK != peer->state) { /* we have to initiate the connection - again, we do not * want to block while the connection is created. * So throw us into an event that will create * the connection via a mini-state-machine :-) */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s tcp:send_nb: initiating connection to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); peer->state = MCA_OOB_TCP_CONNECTING; ORTE_ACTIVATE_TCP_CONN_STATE(peer, mca_oob_tcp_peer_try_connect); } cleanup: OBJ_RELEASE(op); }
/* * Non-blocking version of mca_oob_send(). * * @param peer (IN) Opaque name of peer process. * @param msg (IN) Array of iovecs describing user buffers and lengths. * @param count (IN) Number of elements in iovec array. * @param flags (IN) Currently unused. * @param cbfunc (IN) Callback function on send completion. * @param cbdata (IN) User data that is passed to callback function. * @return OMPI error code (<0) on error number of bytes actually sent. * */ int mca_oob_tcp_send_nb( orte_process_name_t* target, orte_process_name_t* origin, struct iovec* iov, int count, int tag, int flags, orte_rml_callback_fn_t cbfunc, void* cbdata) { mca_oob_tcp_peer_t* peer = mca_oob_tcp_peer_lookup(target); mca_oob_tcp_msg_t* msg; int size; int rc; if(NULL == peer) return ORTE_ERR_UNREACH; MCA_OOB_TCP_MSG_ALLOC(msg, rc); if(NULL == msg) { return rc; } /* calculate the size of the message */ size = 0; for(rc = 0; rc < count; rc++) { size += iov[rc].iov_len; } if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { opal_output(0, "%s-%s mca_oob_tcp_send_nb: tag %d size %lu\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->peer_name)), tag, (unsigned long)size ); } /* turn the size to network byte order so there will be no problems */ msg->msg_hdr.msg_type = MCA_OOB_TCP_DATA; msg->msg_hdr.msg_size = size; msg->msg_hdr.msg_tag = tag; msg->msg_hdr.msg_origin = *origin; msg->msg_hdr.msg_src = *ORTE_PROC_MY_NAME; msg->msg_hdr.msg_dst = *target; /* create one additional iovect that will hold the size of the message */ msg->msg_type = MCA_OOB_TCP_POSTED; msg->msg_rc = 0; msg->msg_flags = flags; msg->msg_uiov = iov; msg->msg_ucnt = count; msg->msg_rwiov = mca_oob_tcp_msg_iov_alloc(msg,count+1); msg->msg_rwiov[0].iov_base = (ompi_iov_base_ptr_t)(&msg->msg_hdr); msg->msg_rwiov[0].iov_len = sizeof(msg->msg_hdr); msg->msg_rwptr = msg->msg_rwiov; msg->msg_rwcnt = msg->msg_rwnum = count + 1; memcpy(msg->msg_rwiov+1, msg->msg_uiov, sizeof(struct iovec)*msg->msg_ucnt); msg->msg_rwbuf = NULL; msg->msg_cbfunc = cbfunc; msg->msg_cbdata = cbdata; msg->msg_complete = false; msg->msg_peer = peer->peer_name; if (OPAL_EQUAL == mca_oob_tcp_process_name_compare(target, ORTE_PROC_MY_NAME)) { /* local delivery */ rc = mca_oob_tcp_send_self(peer,msg,iov,count); if (rc < 0 ) { return rc; } else if (size == rc) { return ORTE_SUCCESS; } else { return ORTE_ERROR; } } MCA_OOB_TCP_HDR_HTON(&msg->msg_hdr); rc = mca_oob_tcp_peer_send(peer, msg); if(rc != ORTE_SUCCESS) { if (rc != ORTE_ERR_ADDRESSEE_UNKNOWN) { MCA_OOB_TCP_MSG_RETURN(msg); } return rc; } return ORTE_SUCCESS; }
static void send_nb(orte_rml_send_t *msg) { mca_oob_tcp_peer_t *peer; orte_process_name_t hop; /* do we have a route to this peer (could be direct)? */ hop = orte_routed.get_route(&msg->dst); /* do we know this hop? */ if (NULL == (peer = mca_oob_tcp_peer_lookup(&hop))) { /* push this back to the component so it can try * another module within this transport. If no * module can be found, the component can push back * to the framework so another component can try */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] processing send to peer %s:%d seq_num = %d hop %s unknown", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num, ORTE_NAME_PRINT(&hop)); ORTE_ACTIVATE_TCP_NO_ROUTE(msg, &hop, mca_oob_tcp_component_no_route); return; } opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s:[%s:%d] processing send to peer %s:%d seq_num = %d via %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), __FILE__, __LINE__, ORTE_NAME_PRINT(&msg->dst), msg->tag, msg->seq_num, ORTE_NAME_PRINT(&peer->name)); /* has this peer had a progress thread assigned yet? */ if (NULL == peer->ev_base) { /* nope - assign one */ ORTE_OOB_TCP_NEXT_BASE(peer); } /* add the msg to the hop's send queue */ if (MCA_OOB_TCP_CONNECTED == peer->state) { opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s tcp:send_nb: already connected to %s - queueing for send", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); MCA_OOB_TCP_QUEUE_SEND(msg, peer); return; } /* add the message to the queue for sending after the * connection is formed */ MCA_OOB_TCP_QUEUE_PENDING(msg, peer); if (MCA_OOB_TCP_CONNECTING != peer->state && MCA_OOB_TCP_CONNECT_ACK != peer->state) { /* we have to initiate the connection - again, we do not * want to block while the connection is created. * So throw us into an event that will create * the connection via a mini-state-machine :-) */ opal_output_verbose(2, orte_oob_base_framework.framework_output, "%s tcp:send_nb: initiating connection to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); peer->state = MCA_OOB_TCP_CONNECTING; ORTE_ACTIVATE_TCP_CONN_STATE(peer, mca_oob_tcp_peer_try_connect); } }
int mca_oob_tcp_send( orte_process_name_t* name, struct iovec *iov, int count, int tag, int flags) { mca_oob_tcp_peer_t* peer = mca_oob_tcp_peer_lookup(name); mca_oob_tcp_msg_t* msg; int size; int rc; if(NULL == peer) return ORTE_ERR_UNREACH; if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_send: tag %d\n", ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_NAME_ARGS(&(peer->peer_name)), tag); } MCA_OOB_TCP_MSG_ALLOC(msg, rc); if(NULL == msg) return rc; /* calculate the size of the message */ size = 0; for(rc = 0; rc < count; rc++) { size += iov[rc].iov_len; } /* turn the size to network byte order so there will be no problems */ msg->msg_hdr.msg_type = MCA_OOB_TCP_DATA; msg->msg_hdr.msg_size = size; msg->msg_hdr.msg_tag = tag; if (NULL == orte_process_info.my_name) { msg->msg_hdr.msg_src = *ORTE_NAME_INVALID; } else { msg->msg_hdr.msg_src = *orte_process_info.my_name; } msg->msg_hdr.msg_dst = *name; /* create one additional iovect that will hold the header */ msg->msg_type = MCA_OOB_TCP_POSTED; msg->msg_rc = 0; msg->msg_flags = flags; msg->msg_uiov = iov; msg->msg_ucnt = count; msg->msg_rwiov = mca_oob_tcp_msg_iov_alloc(msg, count+1); msg->msg_rwiov[0].iov_base = (ompi_iov_base_ptr_t)(&msg->msg_hdr); msg->msg_rwiov[0].iov_len = sizeof(msg->msg_hdr); msg->msg_rwptr = msg->msg_rwiov; msg->msg_rwcnt = msg->msg_rwnum = count + 1; memcpy(msg->msg_rwiov+1, msg->msg_uiov, sizeof(struct iovec)*msg->msg_ucnt); msg->msg_rwbuf = NULL; msg->msg_cbfunc = NULL; msg->msg_cbdata = NULL; msg->msg_complete = false; msg->msg_peer = peer->peer_name; if (NULL != name && NULL != orte_process_info.my_name && ORTE_EQUAL == mca_oob_tcp_process_name_compare(name, orte_process_info.my_name)) { /* local delivery */ return mca_oob_tcp_send_self(peer,msg,iov,count); } MCA_OOB_TCP_HDR_HTON(&msg->msg_hdr); rc = mca_oob_tcp_peer_send(peer, msg); if(rc != ORTE_SUCCESS) { MCA_OOB_TCP_MSG_RETURN(msg); return rc; } rc = mca_oob_tcp_msg_wait(msg, &size); MCA_OOB_TCP_MSG_RETURN(msg); if(rc != ORTE_SUCCESS) return rc; size -= sizeof(mca_oob_tcp_hdr_t); return size; }