void mca_oob_usock_component_cannot_send(int fd, short args, void *cbdata) { mca_oob_usock_msg_error_t *pop = (mca_oob_usock_msg_error_t*)cbdata; uint64_t ui64; int rc; opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s usock:unable to send to peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pop->hop)); /* retrieve the peer's name */ memcpy(&ui64, (char*)&(pop->hop), sizeof(uint64_t)); /* mark the OOB's table that we can't reach it any more - for now, we don't * worry about shifting to another component. Eventually, we will want to push * this decision to the OOB so it can try other components and eventually error out */ if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers, ui64, NULL))) { ORTE_ERROR_LOG(rc); } /* have the OOB base try to send it again */ ORTE_OOB_SEND(pop->rmsg); OBJ_RELEASE(pop); }
void mca_oob_ud_req_complete (mca_oob_ud_req_t *req, int rc) { int i; opal_output_verbose(10, orte_oob_base_framework.framework_output, "%s oob:ud:req_complete %s request %p completed with status %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (req->type == MCA_OOB_UD_REQ_SEND) ? "SEND":"RECV", (void *) req, rc); if (NULL != req->req_qp) { (void) mca_oob_ud_qp_data_release (req->req_qp); req->req_qp = NULL; } /* deregister memory *before* handing it to the callback */ MCA_OOB_UD_REQ_DEREG_MR(req); switch (req->type) { case MCA_OOB_UD_REQ_SEND: if (req->req_data_type != MCA_OOB_UD_REQ_TR) { req->rml_msg->status = rc; } break; case MCA_OOB_UD_REQ_RECV: if ((req->req_target.jobid == ORTE_PROC_MY_NAME->jobid) && (req->req_target.vpid == ORTE_PROC_MY_NAME->vpid)) { opal_output_verbose(1, orte_oob_base_framework.framework_output, "%s DELIVERING TO RML", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); if (MCA_OOB_UD_REQ_IOV == req->req_data_type) { char *data = (char *)calloc(req->req_data.iov.count, sizeof(struct iovec)); int datalen = 0; for (i = 0 ; i < req->req_data.iov.count; ++i) { memcpy (&data[datalen], req->req_data.iov.uiov[i].iov_base, req->req_data.iov.uiov[i].iov_len); datalen += req->req_data.iov.uiov[i].iov_len; } ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, req->req_seq_num, data, datalen); free(data); } else { ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, req->req_seq_num, req->req_data.buf.p, req->req_data.buf.size); } } else { opal_output_verbose(1, orte_oob_base_framework.framework_output, "%s UD PROMOTING ROUTED MESSAGE FOR %s TO OOB", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&req->req_target)); orte_rml_send_t *snd = OBJ_NEW(orte_rml_send_t); snd->dst = req->req_target; snd->origin = req->req_origin; snd->tag = req->req_tag; snd->seq_num = req->req_seq_num; if (MCA_OOB_UD_REQ_IOV == req->req_data_type) { char *data = (char *)calloc(req->req_data.iov.count, sizeof(struct iovec)); int datalen = 0; for (i = 0 ; i < req->req_data.iov.count; ++i) { memcpy (&data[datalen], req->req_data.iov.uiov[i].iov_base, req->req_data.iov.uiov[i].iov_len); datalen += req->req_data.iov.uiov[i].iov_len; } snd->data = data; snd->count = datalen; } else { char *data = (char *)calloc(req->req_data.buf.size, sizeof(char)); memcpy (data, req->req_data.buf.p, req->req_data.buf.size); snd->data = data; snd->count = req->req_data.buf.size; } snd->cbfunc.iov = NULL; snd->cbdata = NULL; /* activate the OOB send state */ ORTE_OOB_SEND(snd); } break; default: break; } mca_oob_ud_req_return (req); }
void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata) { /* process ack received for the msg */ uint32_t num_msgs_acked, channel_num, i; int32_t num_values, room_num; orte_rml_send_t *msg, *missed_msg; void *occupant = NULL; orte_rml_channel_t *channel; orte_qos_ack_channel_t *ack_chan; uint32_t *seq_num_array; uint32_t ack_type; uint32_t missed_msg_seq_num = 0; num_values = 1; /* unpack channel number first */ opal_dss.unpack(buffer, (void*) &channel_num, &num_values, OPAL_UINT32); OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, "orte_qos_ack_channel_process_ack recieved ack on channel = %d", channel_num)); channel = orte_rml_base_get_channel (channel_num); if ((NULL != channel) || (NULL != channel->qos_channel_ptr)) { ack_chan = (orte_qos_ack_channel_t *) (channel->qos_channel_ptr); seq_num_array = malloc (sizeof(uint32_t) * ack_chan->window); num_values = 1; /* unpack ack type */ opal_dss.unpack(buffer, (void*) &ack_type, &num_values, OPAL_UINT32); num_values = 1; /* unpack num messages acked */ opal_dss.unpack(buffer, (void*) &num_msgs_acked, &num_values, OPAL_UINT32); OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, "orte_qos_ack_channel_process_ack recieved ack type %d for %d msgs on channel = %d", ack_type, num_msgs_acked, channel_num)); if (ACK_OUT_OF_ORDER != ack_type) { //handle normal ACK for (i = 0; i < num_msgs_acked; i++) { opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32); room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]); opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1); if((occupant != NULL) && (room_num != -1)) { msg = (orte_rml_send_t*) occupant; OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, "Releasing sent message with tag %d and seq_num %d after receiving Ack from dest ", msg->tag, msg->seq_num )); msg->status = ORTE_SUCCESS; ORTE_RML_SEND_COMPLETE(msg); } else { OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, "OOPS received an ACK for already completed seq_num =%d ", seq_num_array[i] )); } } } else { // handle out of order ACK - complete msgs received in order, retry the lost msg. for (i = 0; i < num_msgs_acked; i++) { opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32); room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]); opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1); if ((NULL != occupant) && ((i == 0 )|| (seq_num_array[i] == seq_num_array[i-1] +1 ))) { msg = (orte_rml_send_t*) occupant; msg->status = ORTE_SUCCESS; ORTE_RML_SEND_COMPLETE(msg); } else { if (NULL != occupant) { // num_missed_msgs = (seq_num_array[i] - seq_num_array [i-1] - 1); assert( i == num_msgs_acked -1); /* recheck the ith msg */ opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)occupant, &room_num); orte_qos_ack_channel_set_msg_room (ack_chan, seq_num_array[i], room_num); /* resend and recheck all the missed msgs*/ missed_msg_seq_num = seq_num_array[i-1] + 1; for (; missed_msg_seq_num < seq_num_array[i]; missed_msg_seq_num++) { room_num = orte_qos_ack_channel_get_msg_room (ack_chan, missed_msg_seq_num); opal_hotel_checkout_and_return_occupant (&ack_chan->outstanding_msgs, room_num, &occupant); assert ( NULL != occupant); missed_msg = (orte_rml_send_t*) occupant; missed_msg->status = ORTE_ERR_LOST_MSG_IN_WINDOW; opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)missed_msg, &room_num); orte_qos_ack_channel_set_msg_room (ack_chan, missed_msg_seq_num, room_num); /* send this out on wire directly */ ORTE_OOB_SEND (missed_msg); } //end for } else { OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, "OOPS received an ACK for already completed seq_num =%d ", seq_num_array[i] )); }//end if (NULL != occupant) } //end else } // end for }//end out of order ack processing free(seq_num_array); }else { OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, "orte_qos_ack_channel_msg_ack_recv_callback recieved ack on non existent channel = %d", channel_num)); } }
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, orte_process_name_t* peer, struct iovec* iov, int count, orte_rml_tag_t tag, orte_rml_callback_fn_t cbfunc, void* cbdata) { orte_rml_recv_t *rcv; orte_rml_send_t *snd; int bytes; orte_self_send_xfer_t *xfer; int i; char* ptr; OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, "%s rml_send to peer %s at tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer), tag)); if (ORTE_RML_TAG_INVALID == tag) { /* cannot send to an invalid tag */ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } if (NULL == peer || OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) { /* cannot send to an invalid peer */ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } /* if this is a message to myself, then just post the message * for receipt - no need to dive into the oob */ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) { /* local delivery */ OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, "%s rml_send_iovec_to_self at tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag)); /* send to self is a tad tricky - we really don't want * to track the send callback function throughout the recv * process and execute it upon receipt as this would provide * very different timing from a non-self message. Specifically, * if we just retain a pointer to the incoming data * and then execute the send callback prior to the receive, * then the caller will think we are done with the data and * can release it. So we have to copy the data in order to * execute the send callback prior to receiving the message. * * In truth, this really is a better mimic of the non-self * message behavior. If we actually pushed the message out * on the wire and had it loop back, then we would receive * a new block of data anyway. */ /* setup the send callback */ xfer = OBJ_NEW(orte_self_send_xfer_t); xfer->iov = iov; xfer->count = count; xfer->cbfunc.iov = cbfunc; xfer->tag = tag; xfer->cbdata = cbdata; /* setup the event for the send callback */ ORTE_THREADSHIFT(xfer, orte_event_base, send_self_exe, ORTE_MSG_PRI); /* copy the message for the recv */ rcv = OBJ_NEW(orte_rml_recv_t); rcv->sender = *peer; rcv->tag = tag; /* get the total number of bytes in the iovec array */ bytes = 0; for (i = 0 ; i < count ; ++i) { bytes += iov[i].iov_len; } /* get the required memory allocation */ if (0 < bytes) { rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(bytes); rcv->iov.iov_len = bytes; /* transfer the bytes */ ptr = (char*)rcv->iov.iov_base; for (i = 0 ; i < count ; ++i) { memcpy(ptr, iov[i].iov_base, iov[i].iov_len); ptr += iov[i].iov_len; } } /* post the message for receipt - since the send callback was posted * first and has the same priority, it will execute first */ ORTE_RML_ACTIVATE_MESSAGE(rcv); return ORTE_SUCCESS; } snd = OBJ_NEW(orte_rml_send_t); snd->dst = *peer; snd->origin = *ORTE_PROC_MY_NAME; snd->tag = tag; snd->iov = iov; snd->count = count; snd->cbfunc.iov = cbfunc; snd->cbdata = cbdata; snd->routed = strdup(mod->routed); /* activate the OOB send state */ ORTE_OOB_SEND(snd); return ORTE_SUCCESS; }
void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) { mca_oob_tcp_msg_error_t *mop = (mca_oob_tcp_msg_error_t*)cbdata; uint64_t ui64; orte_rml_send_t *snd; orte_oob_base_peer_t *bpr; opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s tcp:unknown hop called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&mop->hop)); if (orte_finalizing || orte_abnormal_term_ordered) { /* just ignore the problem */ OBJ_RELEASE(mop); return; } /* mark that this component cannot reach this hop */ memcpy(&ui64, (char*)&(mop->hop), sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&bpr) || NULL == bpr) { /* the overall OOB has no knowledge of this hop. Only * way this could happen is if the peer contacted us * via this component, and it wasn't entered into the * OOB framework hash table. We have no way of knowing * what to do next, so just output an error message and * abort */ opal_output(0, "%s ERROR: message to %s requires routing and the OOB has no knowledge of the reqd hop %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&mop->snd->hdr.dst), ORTE_NAME_PRINT(&mop->hop)); ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED); OBJ_RELEASE(mop); return; } opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx); /* mark that this component cannot reach this destination either */ memcpy(&ui64, (char*)&(mop->snd->hdr.dst), sizeof(uint64_t)); if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers, ui64, (void**)&bpr) || NULL == bpr) { opal_output(0, "%s ERROR: message to %s requires routing and the OOB has no knowledge of this process", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&mop->snd->hdr.dst)); ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED); OBJ_RELEASE(mop); return; } opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx); /* post the message to the OOB so it can see * if another component can transfer it */ MCA_OOB_TCP_HDR_NTOH(&mop->snd->hdr); snd = OBJ_NEW(orte_rml_send_t); snd->dst = mop->snd->hdr.dst; snd->origin = mop->snd->hdr.origin; snd->tag = mop->snd->hdr.tag; snd->data = mop->snd->data; snd->count = mop->snd->hdr.nbytes; snd->cbfunc.iov = NULL; snd->cbdata = NULL; /* activate the OOB send state */ ORTE_OOB_SEND(snd); /* protect the data */ mop->snd->data = NULL; OBJ_RELEASE(mop); }
static void send_msg(int fd, short args, void *cbdata) { orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata; orte_process_name_t *peer = &(req->post.dst); orte_rml_tag_t tag = req->post.tag; orte_rml_recv_t *rcv; orte_rml_send_t *snd; int bytes; orte_self_send_xfer_t *xfer; int i; char* ptr; OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, "%s rml_send_msg to peer %s at tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer), tag)); OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(peer))); /* if this is a message to myself, then just post the message * for receipt - no need to dive into the oob */ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) { /* local delivery */ OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, "%s rml_send_iovec_to_self at tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag)); /* send to self is a tad tricky - we really don't want * to track the send callback function throughout the recv * process and execute it upon receipt as this would provide * very different timing from a non-self message. Specifically, * if we just retain a pointer to the incoming data * and then execute the send callback prior to the receive, * then the caller will think we are done with the data and * can release it. So we have to copy the data in order to * execute the send callback prior to receiving the message. * * In truth, this really is a better mimic of the non-self * message behavior. If we actually pushed the message out * on the wire and had it loop back, then we would receive * a new block of data anyway. */ /* setup the send callback */ xfer = OBJ_NEW(orte_self_send_xfer_t); if (NULL != req->post.iov) { xfer->iov = req->post.iov; xfer->count = req->post.count; xfer->cbfunc.iov = req->post.cbfunc.iov; } else { xfer->buffer = req->post.buffer; xfer->cbfunc.buffer = req->post.cbfunc.buffer; } xfer->tag = tag; xfer->cbdata = req->post.cbdata; /* setup the event for the send callback */ opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1); /* copy the message for the recv */ rcv = OBJ_NEW(orte_rml_recv_t); rcv->sender = *peer; rcv->tag = tag; if (NULL != req->post.iov) { /* get the total number of bytes in the iovec array */ bytes = 0; for (i = 0 ; i < req->post.count ; ++i) { bytes += req->post.iov[i].iov_len; } /* get the required memory allocation */ if (0 < bytes) { rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(bytes); rcv->iov.iov_len = bytes; /* transfer the bytes */ ptr = (char*)rcv->iov.iov_base; for (i = 0 ; i < req->post.count ; ++i) { memcpy(ptr, req->post.iov[i].iov_base, req->post.iov[i].iov_len); ptr += req->post.iov[i].iov_len; } } } else if (0 < req->post.buffer->bytes_used) { rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(req->post.buffer->bytes_used); memcpy(rcv->iov.iov_base, req->post.buffer->base_ptr, req->post.buffer->bytes_used); rcv->iov.iov_len = req->post.buffer->bytes_used; } /* post the message for receipt - since the send callback was posted * first and has the same priority, it will execute first */ ORTE_RML_ACTIVATE_MESSAGE(rcv); OBJ_RELEASE(req); return; } snd = OBJ_NEW(orte_rml_send_t); snd->dst = *peer; snd->origin = *ORTE_PROC_MY_NAME; snd->tag = tag; if (NULL != req->post.iov) { snd->iov = req->post.iov; snd->count = req->post.count; snd->cbfunc.iov = req->post.cbfunc.iov; } else { snd->buffer = req->post.buffer; snd->cbfunc.buffer = req->post.cbfunc.buffer; } snd->cbdata = req->post.cbdata; /* activate the OOB send state */ ORTE_OOB_SEND(snd); OBJ_RELEASE(req); }
int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod, orte_process_name_t* peer, opal_buffer_t* buffer, orte_rml_tag_t tag, orte_rml_buffer_callback_fn_t cbfunc, void* cbdata) { orte_rml_recv_t *rcv; orte_rml_send_t *snd; orte_self_send_xfer_t *xfer; OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, "%s rml_send_buffer to peer %s at tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer), tag)); if (ORTE_RML_TAG_INVALID == tag) { /* cannot send to an invalid tag */ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } if (NULL == peer || OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) { /* cannot send to an invalid peer */ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(peer))); /* if this is a message to myself, then just post the message * for receipt - no need to dive into the oob */ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) { /* local delivery */ OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output, "%s rml_send_iovec_to_self at tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag)); /* send to self is a tad tricky - we really don't want * to track the send callback function throughout the recv * process and execute it upon receipt as this would provide * very different timing from a non-self message. Specifically, * if we just retain a pointer to the incoming data * and then execute the send callback prior to the receive, * then the caller will think we are done with the data and * can release it. So we have to copy the data in order to * execute the send callback prior to receiving the message. * * In truth, this really is a better mimic of the non-self * message behavior. If we actually pushed the message out * on the wire and had it loop back, then we would receive * a new block of data anyway. */ /* setup the send callback */ xfer = OBJ_NEW(orte_self_send_xfer_t); xfer->buffer = buffer; xfer->cbfunc.buffer = cbfunc; xfer->tag = tag; xfer->cbdata = cbdata; /* setup the event for the send callback */ opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer); opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI); opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1); /* copy the message for the recv */ rcv = OBJ_NEW(orte_rml_recv_t); rcv->sender = *peer; rcv->tag = tag; rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(buffer->bytes_used); memcpy(rcv->iov.iov_base, buffer->base_ptr, buffer->bytes_used); rcv->iov.iov_len = buffer->bytes_used; /* post the message for receipt - since the send callback was posted * first and has the same priority, it will execute first */ ORTE_RML_ACTIVATE_MESSAGE(rcv); return ORTE_SUCCESS; } snd = OBJ_NEW(orte_rml_send_t); snd->dst = *peer; snd->origin = *ORTE_PROC_MY_NAME; snd->tag = tag; snd->buffer = buffer; snd->cbfunc.buffer = cbfunc; snd->cbdata = cbdata; snd->routed = strdup(mod->routed); /* activate the OOB send state */ ORTE_OOB_SEND(snd); return ORTE_SUCCESS; }
void mca_oob_usock_recv_handler(int sd, short flags, void *cbdata) { mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)cbdata; int rc; orte_rml_send_t *snd; if (orte_abnormal_term_ordered) { return; } opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:usock:recv:handler called for peer %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); switch (peer->state) { case MCA_OOB_USOCK_CONNECT_ACK: if (ORTE_SUCCESS == (rc = mca_oob_usock_peer_recv_connect_ack(peer, peer->sd, NULL))) { opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:usock:recv:handler starting send/recv events", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* we connected! Start the send/recv events */ if (!peer->recv_ev_active) { opal_event_add(&peer->recv_event, 0); peer->recv_ev_active = true; } if (peer->timer_ev_active) { opal_event_del(&peer->timer_event); peer->timer_ev_active = false; } /* if there is a message waiting to be sent, queue it */ if (NULL == peer->send_msg) { peer->send_msg = (mca_oob_usock_send_t*)opal_list_remove_first(&peer->send_queue); } if (NULL != peer->send_msg && !peer->send_ev_active) { opal_event_add(&peer->send_event, 0); peer->send_ev_active = true; } /* update our state */ peer->state = MCA_OOB_USOCK_CONNECTED; } else { opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s UNABLE TO COMPLETE CONNECT ACK WITH %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name)); opal_event_del(&peer->recv_event); peer->recv_ev_active = false; ORTE_FORCED_TERMINATE(1); return; } break; case MCA_OOB_USOCK_CONNECTED: opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:usock:recv:handler CONNECTED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* allocate a new message and setup for recv */ if (NULL == peer->recv_msg) { opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:usock:recv:handler allocate new recv msg", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); peer->recv_msg = OBJ_NEW(mca_oob_usock_recv_t); if (NULL == peer->recv_msg) { opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: unable to allocate recv message\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name))); return; } /* start by reading the header */ peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr; peer->recv_msg->rdbytes = sizeof(mca_oob_usock_hdr_t); } /* if the header hasn't been completely read, read it */ if (!peer->recv_msg->hdr_recvd) { opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:usock:recv:handler read hdr", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); if (ORTE_SUCCESS == (rc = read_bytes(peer))) { /* completed reading the header */ peer->recv_msg->hdr_recvd = true; /* if this is a zero-byte message, then we are done */ if (0 == peer->recv_msg->hdr.nbytes) { opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s RECVD ZERO-BYTE MESSAGE FROM %s for tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->name), peer->recv_msg->hdr.tag); peer->recv_msg->data = NULL; // make sure peer->recv_msg->rdptr = NULL; peer->recv_msg->rdbytes = 0; } else { opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:usock:recv:handler allocate data region of size %lu", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)peer->recv_msg->hdr.nbytes); /* allocate the data region */ peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes); /* point to it */ peer->recv_msg->rdptr = peer->recv_msg->data; peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes; } /* fall thru and attempt to read the data */ } else if (ORTE_ERR_RESOURCE_BUSY == rc || ORTE_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ return; } else { /* close the connection */ opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s:usock:recv:handler error reading bytes - closing connection", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); mca_oob_usock_peer_close(peer); return; } } if (peer->recv_msg->hdr_recvd) { /* continue to read the data block - we start from * wherever we left off, which could be at the * beginning or somewhere in the message */ if (ORTE_SUCCESS == (rc = read_bytes(peer))) { /* we recvd all of the message */ opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s RECVD COMPLETE MESSAGE FROM %s OF %d BYTES FOR DEST %s TAG %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&peer->recv_msg->hdr.origin), (int)peer->recv_msg->hdr.nbytes, ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst), peer->recv_msg->hdr.tag); /* am I the intended recipient? */ if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid && peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) { /* yes - post it to the RML for delivery */ opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output, "%s DELIVERING TO RML", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag, peer->recv_msg->hdr.seq_num, peer->recv_msg->data, peer->recv_msg->hdr.nbytes); OBJ_RELEASE(peer->recv_msg); } else { /* no - we don't route things, so we promote this * back to the OOB and let another transport move * it along. If we are a daemon and it is intended * for another of our local procs, it will just come * back to us and be handled then */ snd = OBJ_NEW(orte_rml_send_t); snd->dst = peer->recv_msg->hdr.dst; snd->origin = peer->recv_msg->hdr.origin; snd->tag = peer->recv_msg->hdr.tag; snd->data = peer->recv_msg->data; snd->seq_num = peer->recv_msg->hdr.seq_num; snd->count = peer->recv_msg->hdr.nbytes; snd->cbfunc.iov = NULL; snd->cbdata = NULL; /* activate the OOB send state */ ORTE_OOB_SEND(snd); /* protect the data */ peer->recv_msg->data = NULL; /* cleanup */ OBJ_RELEASE(peer->recv_msg); return; } } else if (ORTE_ERR_RESOURCE_BUSY == rc || ORTE_ERR_WOULD_BLOCK == rc) { /* exit this event and let the event lib progress */ return; } else { // report the error opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: unable to recv message", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name))); /* turn off the recv event */ opal_event_del(&peer->recv_event); peer->recv_ev_active = false; ORTE_FORCED_TERMINATE(1); return; } } break; default: opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: invalid socket state(%d)", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&(peer->name)), peer->state); // mca_oob_usock_peer_close(peer); break; } }