void mca_oob_usock_component_cannot_send(int fd, short args, void *cbdata)
{
    mca_oob_usock_msg_error_t *pop = (mca_oob_usock_msg_error_t*)cbdata;
    uint64_t ui64;
    int rc;

    opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                        "%s usock:unable to send to peer %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&pop->hop));

    /* retrieve the peer's name */
    memcpy(&ui64, (char*)&(pop->hop), sizeof(uint64_t));

    /* mark the OOB's table that we can't reach it any more - for now, we don't
     * worry about shifting to another component. Eventually, we will want to push
     * this decision to the OOB so it can try other components and eventually error out
     */
    if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
                                                               ui64, NULL))) {
        ORTE_ERROR_LOG(rc);
    }

    /* have the OOB base try to send it again */
    ORTE_OOB_SEND(pop->rmsg);

    OBJ_RELEASE(pop);
}
예제 #2
0
파일: oob_ud_req.c 프로젝트: 00datman/ompi
void mca_oob_ud_req_complete (mca_oob_ud_req_t *req, int rc)
{
    int i;
    opal_output_verbose(10, orte_oob_base_framework.framework_output,
                         "%s oob:ud:req_complete %s request %p completed with status %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (req->type == MCA_OOB_UD_REQ_SEND) ? "SEND":"RECV", (void *) req, rc);

    if (NULL != req->req_qp) {
        (void) mca_oob_ud_qp_data_release (req->req_qp);
        req->req_qp = NULL;
    }

    /* deregister memory *before* handing it to the callback */
    MCA_OOB_UD_REQ_DEREG_MR(req);

    switch (req->type) {
    case MCA_OOB_UD_REQ_SEND:
        if (req->req_data_type != MCA_OOB_UD_REQ_TR) {
            req->rml_msg->status = rc;
        }
        break;
    case MCA_OOB_UD_REQ_RECV:
        if ((req->req_target.jobid == ORTE_PROC_MY_NAME->jobid) &&
            (req->req_target.vpid == ORTE_PROC_MY_NAME->vpid)) {
            opal_output_verbose(1, orte_oob_base_framework.framework_output,
                "%s DELIVERING TO RML",
                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            if (MCA_OOB_UD_REQ_IOV == req->req_data_type) {
                char *data = (char *)calloc(req->req_data.iov.count, sizeof(struct iovec));
                int datalen = 0;
                for (i = 0 ; i < req->req_data.iov.count; ++i) {
                    memcpy (&data[datalen], req->req_data.iov.uiov[i].iov_base, req->req_data.iov.uiov[i].iov_len);
                    datalen += req->req_data.iov.uiov[i].iov_len;
                }
                ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, req->req_seq_num, data, datalen);
                free(data);
            } else {
                ORTE_RML_POST_MESSAGE(&req->req_origin, req->req_tag, req->req_seq_num,
                                      req->req_data.buf.p, req->req_data.buf.size);
            }
        } else {
            opal_output_verbose(1, orte_oob_base_framework.framework_output,
                                "%s UD PROMOTING ROUTED MESSAGE FOR %s TO OOB",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                ORTE_NAME_PRINT(&req->req_target));

            orte_rml_send_t *snd = OBJ_NEW(orte_rml_send_t);
            snd->dst = req->req_target;
            snd->origin =  req->req_origin;
            snd->tag = req->req_tag;
            snd->seq_num = req->req_seq_num;
            if (MCA_OOB_UD_REQ_IOV == req->req_data_type) {
                char *data = (char *)calloc(req->req_data.iov.count, sizeof(struct iovec));
                int datalen = 0;
                for (i = 0 ; i < req->req_data.iov.count; ++i) {
                    memcpy (&data[datalen], req->req_data.iov.uiov[i].iov_base, req->req_data.iov.uiov[i].iov_len);
                    datalen += req->req_data.iov.uiov[i].iov_len;
                }
                snd->data = data;
                snd->count = datalen;
            } else {
                char *data = (char *)calloc(req->req_data.buf.size, sizeof(char));
                memcpy (data, req->req_data.buf.p, req->req_data.buf.size);
                snd->data = data;
                snd->count = req->req_data.buf.size;
            }
            snd->cbfunc.iov = NULL;
            snd->cbdata = NULL;
            /* activate the OOB send state */
            ORTE_OOB_SEND(snd);
        }
        break;
    default:
        break;
    }

    mca_oob_ud_req_return (req);
}
예제 #3
0
void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender,
                                       opal_buffer_t *buffer,
                                       orte_rml_tag_t tag, void *cbdata)
{
    /*  process ack received for the msg */
    uint32_t num_msgs_acked, channel_num, i;
    int32_t num_values, room_num;
    orte_rml_send_t *msg, *missed_msg;
    void *occupant = NULL;
    orte_rml_channel_t *channel;
    orte_qos_ack_channel_t *ack_chan;
    uint32_t *seq_num_array;
    uint32_t ack_type;
    uint32_t missed_msg_seq_num = 0;
    num_values = 1;
    /* unpack channel number first */
    opal_dss.unpack(buffer, (void*) &channel_num, &num_values, OPAL_UINT32);
    OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output,
                         "orte_qos_ack_channel_process_ack recieved ack on channel = %d",
                         channel_num));
    channel = orte_rml_base_get_channel (channel_num);
    if ((NULL != channel) || (NULL != channel->qos_channel_ptr)) {
        ack_chan = (orte_qos_ack_channel_t *) (channel->qos_channel_ptr);
        seq_num_array = malloc (sizeof(uint32_t) * ack_chan->window);
        num_values = 1;
        /* unpack ack type */
        opal_dss.unpack(buffer, (void*) &ack_type, &num_values, OPAL_UINT32);
        num_values = 1;
        /* unpack num messages acked */
        opal_dss.unpack(buffer, (void*) &num_msgs_acked, &num_values, OPAL_UINT32);
        OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output,
                             "orte_qos_ack_channel_process_ack recieved ack type %d for %d msgs on channel = %d",
                             ack_type, num_msgs_acked, channel_num));
        if (ACK_OUT_OF_ORDER != ack_type)   {
            //handle normal ACK
            for (i = 0; i < num_msgs_acked; i++)
                {
                    opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32);
                    room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]);
                    opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant);
                    orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1);
                    if((occupant != NULL) && (room_num != -1)) {
                        msg = (orte_rml_send_t*) occupant;
                        OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output,
                                             "Releasing sent message with tag %d and seq_num %d after receiving Ack from dest ",
                                             msg->tag, msg->seq_num ));
                        msg->status = ORTE_SUCCESS;
                        ORTE_RML_SEND_COMPLETE(msg);
                    } else {
                        OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output,
                                             "OOPS received an ACK for already completed seq_num =%d ",
                                             seq_num_array[i] ));
                    }
                }
        } else {
            // handle out of order ACK - complete msgs received in order, retry the lost msg.
            for (i = 0; i < num_msgs_acked; i++)
                {
                    opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32);
                    room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]);
                    opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant);
                    orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1);
                    if ((NULL != occupant) && ((i == 0 )|| (seq_num_array[i] == seq_num_array[i-1] +1 ))) {
                        msg = (orte_rml_send_t*) occupant;
                        msg->status = ORTE_SUCCESS;
                        ORTE_RML_SEND_COMPLETE(msg);
                    } else {
                        if (NULL != occupant) {
                            // num_missed_msgs = (seq_num_array[i] - seq_num_array [i-1] - 1);
                            assert( i == num_msgs_acked -1);
                            /* recheck the ith msg */
                            opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)occupant, &room_num);
                            orte_qos_ack_channel_set_msg_room (ack_chan, seq_num_array[i], room_num);
                            /* resend and recheck all the missed msgs*/
                            missed_msg_seq_num = seq_num_array[i-1] + 1;
                            for (; missed_msg_seq_num < seq_num_array[i]; missed_msg_seq_num++) {
                                room_num = orte_qos_ack_channel_get_msg_room (ack_chan, missed_msg_seq_num);
                                opal_hotel_checkout_and_return_occupant (&ack_chan->outstanding_msgs, room_num, &occupant);
                                assert ( NULL != occupant);
                                missed_msg = (orte_rml_send_t*) occupant;
                                missed_msg->status = ORTE_ERR_LOST_MSG_IN_WINDOW;
                                opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)missed_msg, &room_num);
                                orte_qos_ack_channel_set_msg_room (ack_chan, missed_msg_seq_num, room_num);
                                /* send this out on wire directly */
                                ORTE_OOB_SEND (missed_msg);
                            } //end for
                        } else {
                            OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output,
                                                 "OOPS received an ACK for already completed seq_num =%d ",
                                                 seq_num_array[i] ));
                        }//end  if (NULL != occupant)
                    } //end else
                } // end for
        }//end out of order ack processing
        free(seq_num_array);
    }else {
        OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output,
                             "orte_qos_ack_channel_msg_ack_recv_callback recieved ack on non existent channel = %d",
                             channel_num));
    }
}
예제 #4
0
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
                         orte_process_name_t* peer,
                         struct iovec* iov,
                         int count,
                         orte_rml_tag_t tag,
                         orte_rml_callback_fn_t cbfunc,
                         void* cbdata)
{
    orte_rml_recv_t *rcv;
    orte_rml_send_t *snd;
    int bytes;
    orte_self_send_xfer_t *xfer;
    int i;
    char* ptr;

    OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
                         "%s rml_send to peer %s at tag %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(peer), tag));

    if (ORTE_RML_TAG_INVALID == tag) {
        /* cannot send to an invalid tag */
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return ORTE_ERR_BAD_PARAM;
    }
    if (NULL == peer ||
        OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) {
        /* cannot send to an invalid peer */
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return ORTE_ERR_BAD_PARAM;
    }

    /* if this is a message to myself, then just post the message
     * for receipt - no need to dive into the oob
     */
    if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) {  /* local delivery */
        OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
                             "%s rml_send_iovec_to_self at tag %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag));
        /* send to self is a tad tricky - we really don't want
         * to track the send callback function throughout the recv
         * process and execute it upon receipt as this would provide
         * very different timing from a non-self message. Specifically,
         * if we just retain a pointer to the incoming data
         * and then execute the send callback prior to the receive,
         * then the caller will think we are done with the data and
         * can release it. So we have to copy the data in order to
         * execute the send callback prior to receiving the message.
         *
         * In truth, this really is a better mimic of the non-self
         * message behavior. If we actually pushed the message out
         * on the wire and had it loop back, then we would receive
         * a new block of data anyway.
         */

        /* setup the send callback */
        xfer = OBJ_NEW(orte_self_send_xfer_t);
        xfer->iov = iov;
        xfer->count = count;
        xfer->cbfunc.iov = cbfunc;
        xfer->tag = tag;
        xfer->cbdata = cbdata;
        /* setup the event for the send callback */
        ORTE_THREADSHIFT(xfer, orte_event_base, send_self_exe, ORTE_MSG_PRI);

        /* copy the message for the recv */
        rcv = OBJ_NEW(orte_rml_recv_t);
        rcv->sender = *peer;
        rcv->tag = tag;
        /* get the total number of bytes in the iovec array */
        bytes = 0;
        for (i = 0 ; i < count ; ++i) {
            bytes += iov[i].iov_len;
        }
        /* get the required memory allocation */
        if (0 < bytes) {
            rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(bytes);
            rcv->iov.iov_len = bytes;
            /* transfer the bytes */
            ptr =  (char*)rcv->iov.iov_base;
            for (i = 0 ; i < count ; ++i) {
                memcpy(ptr, iov[i].iov_base, iov[i].iov_len);
                ptr += iov[i].iov_len;
            }
        }
        /* post the message for receipt - since the send callback was posted
         * first and has the same priority, it will execute first
         */
        ORTE_RML_ACTIVATE_MESSAGE(rcv);
        return ORTE_SUCCESS;
    }

    snd = OBJ_NEW(orte_rml_send_t);
    snd->dst = *peer;
    snd->origin = *ORTE_PROC_MY_NAME;
    snd->tag = tag;
    snd->iov = iov;
    snd->count = count;
    snd->cbfunc.iov = cbfunc;
    snd->cbdata = cbdata;
    snd->routed = strdup(mod->routed);

    /* activate the OOB send state */
    ORTE_OOB_SEND(snd);

    return ORTE_SUCCESS;
}
예제 #5
0
void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata)
{
    mca_oob_tcp_msg_error_t *mop = (mca_oob_tcp_msg_error_t*)cbdata;
    uint64_t ui64;
    orte_rml_send_t *snd;
    orte_oob_base_peer_t *bpr;

    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                        "%s tcp:unknown hop called for peer %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&mop->hop));

    if (orte_finalizing || orte_abnormal_term_ordered) {
        /* just ignore the problem */
        OBJ_RELEASE(mop);
        return;
    }

   /* mark that this component cannot reach this hop */
    memcpy(&ui64, (char*)&(mop->hop), sizeof(uint64_t));
    if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
                                                         ui64, (void**)&bpr) ||
        NULL == bpr) {
        /* the overall OOB has no knowledge of this hop. Only
         * way this could happen is if the peer contacted us
         * via this component, and it wasn't entered into the
         * OOB framework hash table. We have no way of knowing
         * what to do next, so just output an error message and
         * abort */
        opal_output(0, "%s ERROR: message to %s requires routing and the OOB has no knowledge of the reqd hop %s",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&mop->snd->hdr.dst),
                    ORTE_NAME_PRINT(&mop->hop));
        ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED);
        OBJ_RELEASE(mop);
        return;
    }
    opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx);

    /* mark that this component cannot reach this destination either */
    memcpy(&ui64, (char*)&(mop->snd->hdr.dst), sizeof(uint64_t));
    if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
                                                         ui64, (void**)&bpr) ||
        NULL == bpr) {
        opal_output(0, "%s ERROR: message to %s requires routing and the OOB has no knowledge of this process",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&mop->snd->hdr.dst));
        ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED);
        OBJ_RELEASE(mop);
        return;
    }
    opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx);

    /* post the message to the OOB so it can see
     * if another component can transfer it
     */
    MCA_OOB_TCP_HDR_NTOH(&mop->snd->hdr);
    snd = OBJ_NEW(orte_rml_send_t);
    snd->dst = mop->snd->hdr.dst;
    snd->origin = mop->snd->hdr.origin;
    snd->tag = mop->snd->hdr.tag;
    snd->data = mop->snd->data;
    snd->count = mop->snd->hdr.nbytes;
    snd->cbfunc.iov = NULL;
    snd->cbdata = NULL;
    /* activate the OOB send state */
    ORTE_OOB_SEND(snd);
    /* protect the data */
    mop->snd->data = NULL;

    OBJ_RELEASE(mop);
}
예제 #6
0
static void send_msg(int fd, short args, void *cbdata)
{
    orte_rml_send_request_t *req = (orte_rml_send_request_t*)cbdata;
    orte_process_name_t *peer = &(req->post.dst);
    orte_rml_tag_t tag = req->post.tag;
    orte_rml_recv_t *rcv;
    orte_rml_send_t *snd;
    int bytes;
    orte_self_send_xfer_t *xfer;
    int i;
    char* ptr;

    OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
                         "%s rml_send_msg to peer %s at tag %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(peer), tag));
    OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(peer)));

    /* if this is a message to myself, then just post the message
     * for receipt - no need to dive into the oob
     */
    if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) {  /* local delivery */
        OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
                             "%s rml_send_iovec_to_self at tag %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag));
        /* send to self is a tad tricky - we really don't want
         * to track the send callback function throughout the recv
         * process and execute it upon receipt as this would provide
         * very different timing from a non-self message. Specifically,
         * if we just retain a pointer to the incoming data
         * and then execute the send callback prior to the receive,
         * then the caller will think we are done with the data and
         * can release it. So we have to copy the data in order to
         * execute the send callback prior to receiving the message.
         *
         * In truth, this really is a better mimic of the non-self
         * message behavior. If we actually pushed the message out
         * on the wire and had it loop back, then we would receive
         * a new block of data anyway.
         */

        /* setup the send callback */
        xfer = OBJ_NEW(orte_self_send_xfer_t);
        if (NULL != req->post.iov) {
            xfer->iov = req->post.iov;
            xfer->count = req->post.count;
            xfer->cbfunc.iov = req->post.cbfunc.iov;
        } else {
            xfer->buffer = req->post.buffer;
            xfer->cbfunc.buffer = req->post.cbfunc.buffer;
        }
        xfer->tag = tag;
        xfer->cbdata = req->post.cbdata;
        /* setup the event for the send callback */
        opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer);
        opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI);
        opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1);

        /* copy the message for the recv */
        rcv = OBJ_NEW(orte_rml_recv_t);
        rcv->sender = *peer;
        rcv->tag = tag;
        if (NULL != req->post.iov) {
            /* get the total number of bytes in the iovec array */
            bytes = 0;
            for (i = 0 ; i < req->post.count ; ++i) {
                bytes += req->post.iov[i].iov_len;
            }
            /* get the required memory allocation */
            if (0 < bytes) {
                rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(bytes);
                rcv->iov.iov_len = bytes;
                /* transfer the bytes */
                ptr =  (char*)rcv->iov.iov_base;
                for (i = 0 ; i < req->post.count ; ++i) {
                    memcpy(ptr, req->post.iov[i].iov_base, req->post.iov[i].iov_len);
                    ptr += req->post.iov[i].iov_len;
                }
            }
        } else if (0 < req->post.buffer->bytes_used) {
            rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(req->post.buffer->bytes_used);
            memcpy(rcv->iov.iov_base, req->post.buffer->base_ptr, req->post.buffer->bytes_used);
            rcv->iov.iov_len = req->post.buffer->bytes_used;
        }
        /* post the message for receipt - since the send callback was posted
         * first and has the same priority, it will execute first
         */
        ORTE_RML_ACTIVATE_MESSAGE(rcv);
        OBJ_RELEASE(req);
        return;
    }

    snd = OBJ_NEW(orte_rml_send_t);
    snd->dst = *peer;
    snd->origin = *ORTE_PROC_MY_NAME;
    snd->tag = tag;
    if (NULL != req->post.iov) {
        snd->iov = req->post.iov;
        snd->count = req->post.count;
        snd->cbfunc.iov = req->post.cbfunc.iov;
    } else {
        snd->buffer = req->post.buffer;
        snd->cbfunc.buffer = req->post.cbfunc.buffer;
    }
    snd->cbdata = req->post.cbdata;

    /* activate the OOB send state */
    ORTE_OOB_SEND(snd);

    OBJ_RELEASE(req);
}
예제 #7
0
int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod,
                                orte_process_name_t* peer,
                                opal_buffer_t* buffer,
                                orte_rml_tag_t tag,
                                orte_rml_buffer_callback_fn_t cbfunc,
                                void* cbdata)
{
    orte_rml_recv_t *rcv;
    orte_rml_send_t *snd;
    orte_self_send_xfer_t *xfer;

    OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
                         "%s rml_send_buffer to peer %s at tag %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(peer), tag));

    if (ORTE_RML_TAG_INVALID == tag) {
        /* cannot send to an invalid tag */
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return ORTE_ERR_BAD_PARAM;
    }
    if (NULL == peer ||
        OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) {
        /* cannot send to an invalid peer */
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return ORTE_ERR_BAD_PARAM;
    }

    OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(peer)));

    /* if this is a message to myself, then just post the message
     * for receipt - no need to dive into the oob
     */
    if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) {  /* local delivery */
        OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
                             "%s rml_send_iovec_to_self at tag %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag));
        /* send to self is a tad tricky - we really don't want
         * to track the send callback function throughout the recv
         * process and execute it upon receipt as this would provide
         * very different timing from a non-self message. Specifically,
         * if we just retain a pointer to the incoming data
         * and then execute the send callback prior to the receive,
         * then the caller will think we are done with the data and
         * can release it. So we have to copy the data in order to
         * execute the send callback prior to receiving the message.
         *
         * In truth, this really is a better mimic of the non-self
         * message behavior. If we actually pushed the message out
         * on the wire and had it loop back, then we would receive
         * a new block of data anyway.
         */

        /* setup the send callback */
        xfer = OBJ_NEW(orte_self_send_xfer_t);
        xfer->buffer = buffer;
        xfer->cbfunc.buffer = cbfunc;
        xfer->tag = tag;
        xfer->cbdata = cbdata;
        /* setup the event for the send callback */
        opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer);
        opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI);
        opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1);

        /* copy the message for the recv */
        rcv = OBJ_NEW(orte_rml_recv_t);
        rcv->sender = *peer;
        rcv->tag = tag;
        rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(buffer->bytes_used);
        memcpy(rcv->iov.iov_base, buffer->base_ptr, buffer->bytes_used);
        rcv->iov.iov_len = buffer->bytes_used;
        /* post the message for receipt - since the send callback was posted
         * first and has the same priority, it will execute first
         */
        ORTE_RML_ACTIVATE_MESSAGE(rcv);
        return ORTE_SUCCESS;
    }

    snd = OBJ_NEW(orte_rml_send_t);
    snd->dst = *peer;
    snd->origin = *ORTE_PROC_MY_NAME;
    snd->tag = tag;
    snd->buffer = buffer;
    snd->cbfunc.buffer = cbfunc;
    snd->cbdata = cbdata;
    snd->routed = strdup(mod->routed);

    /* activate the OOB send state */
    ORTE_OOB_SEND(snd);

    return ORTE_SUCCESS;
}
예제 #8
0
void mca_oob_usock_recv_handler(int sd, short flags, void *cbdata)
{
    mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)cbdata;
    int rc;
    orte_rml_send_t *snd;

    if (orte_abnormal_term_ordered) {
        return;
    }

    opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                        "%s:usock:recv:handler called for peer %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&peer->name));

    switch (peer->state) {
    case MCA_OOB_USOCK_CONNECT_ACK:
        if (ORTE_SUCCESS == (rc = mca_oob_usock_peer_recv_connect_ack(peer, peer->sd, NULL))) {
            opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s:usock:recv:handler starting send/recv events",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            /* we connected! Start the send/recv events */
            if (!peer->recv_ev_active) {
                opal_event_add(&peer->recv_event, 0);
                peer->recv_ev_active = true;
            }
            if (peer->timer_ev_active) {
                opal_event_del(&peer->timer_event);
                peer->timer_ev_active = false;
            }
            /* if there is a message waiting to be sent, queue it */
            if (NULL == peer->send_msg) {
                peer->send_msg = (mca_oob_usock_send_t*)opal_list_remove_first(&peer->send_queue);
            }
            if (NULL != peer->send_msg && !peer->send_ev_active) {
                opal_event_add(&peer->send_event, 0);
                peer->send_ev_active = true;
            }
            /* update our state */
            peer->state = MCA_OOB_USOCK_CONNECTED;
        } else {
            opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s UNABLE TO COMPLETE CONNECT ACK WITH %s",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                ORTE_NAME_PRINT(&peer->name));
            opal_event_del(&peer->recv_event);
            peer->recv_ev_active = false;
            ORTE_FORCED_TERMINATE(1);
            return;
        }
        break;
    case MCA_OOB_USOCK_CONNECTED:
        opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                            "%s:usock:recv:handler CONNECTED",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        /* allocate a new message and setup for recv */
        if (NULL == peer->recv_msg) {
            opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s:usock:recv:handler allocate new recv msg",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            peer->recv_msg = OBJ_NEW(mca_oob_usock_recv_t);
            if (NULL == peer->recv_msg) {
                opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: unable to allocate recv message\n",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&(peer->name)));
                return;
            }
            /* start by reading the header */
            peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr;
            peer->recv_msg->rdbytes = sizeof(mca_oob_usock_hdr_t);
        }
        /* if the header hasn't been completely read, read it */
        if (!peer->recv_msg->hdr_recvd) {
            opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s:usock:recv:handler read hdr",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
                /* completed reading the header */
                peer->recv_msg->hdr_recvd = true;
                /* if this is a zero-byte message, then we are done */
                if (0 == peer->recv_msg->hdr.nbytes) {
                    opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                        "%s RECVD ZERO-BYTE MESSAGE FROM %s for tag %d",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                        ORTE_NAME_PRINT(&peer->name), peer->recv_msg->hdr.tag);
                    peer->recv_msg->data = NULL;  // make sure
                    peer->recv_msg->rdptr = NULL;
                    peer->recv_msg->rdbytes = 0;
                } else {
                    opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                        "%s:usock:recv:handler allocate data region of size %lu",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)peer->recv_msg->hdr.nbytes);
                    /* allocate the data region */
                    peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes);
                    /* point to it */
                    peer->recv_msg->rdptr = peer->recv_msg->data;
                    peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes;
                }
                /* fall thru and attempt to read the data */
            } else if (ORTE_ERR_RESOURCE_BUSY == rc ||
                       ORTE_ERR_WOULD_BLOCK == rc) {
                /* exit this event and let the event lib progress */
                return;
            } else {
                /* close the connection */
                opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                    "%s:usock:recv:handler error reading bytes - closing connection",
                                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
                mca_oob_usock_peer_close(peer);
                return;
            }
        }

        if (peer->recv_msg->hdr_recvd) {
            /* continue to read the data block - we start from
             * wherever we left off, which could be at the
             * beginning or somewhere in the message
             */
            if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
                /* we recvd all of the message */
                opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                    "%s RECVD COMPLETE MESSAGE FROM %s OF %d BYTES FOR DEST %s TAG %d",
                                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                    ORTE_NAME_PRINT(&peer->recv_msg->hdr.origin),
                                    (int)peer->recv_msg->hdr.nbytes,
                                    ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst),
                                    peer->recv_msg->hdr.tag);
                /* am I the intended recipient? */
                if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid &&
                    peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) {
                    /* yes - post it to the RML for delivery */
                    opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                       "%s DELIVERING TO RML",
                                       ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
                    ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag,
                                          peer->recv_msg->hdr.seq_num,
                                          peer->recv_msg->data,
                                          peer->recv_msg->hdr.nbytes);
                    OBJ_RELEASE(peer->recv_msg);
                } else {
                    /* no - we don't route things, so we promote this
                     * back to the OOB and let another transport move
                     * it along. If we are a daemon and it is intended
                     * for another of our local procs, it will just come
                     * back to us and be handled then
                     */
                    snd = OBJ_NEW(orte_rml_send_t);
                    snd->dst = peer->recv_msg->hdr.dst;
                    snd->origin = peer->recv_msg->hdr.origin;
                    snd->tag = peer->recv_msg->hdr.tag;
                    snd->data = peer->recv_msg->data;
                    snd->seq_num = peer->recv_msg->hdr.seq_num;
                    snd->count = peer->recv_msg->hdr.nbytes;
                    snd->cbfunc.iov = NULL;
                    snd->cbdata = NULL;
                    /* activate the OOB send state */
                    ORTE_OOB_SEND(snd);
                    /* protect the data */
                    peer->recv_msg->data = NULL;
                    /* cleanup */
                    OBJ_RELEASE(peer->recv_msg);
                    return;
                }
            } else if (ORTE_ERR_RESOURCE_BUSY == rc ||
                       ORTE_ERR_WOULD_BLOCK == rc) {
                /* exit this event and let the event lib progress */
                return;
            } else {
                // report the error
                opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: unable to recv message",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&(peer->name)));
                /* turn off the recv event */
                opal_event_del(&peer->recv_event);
                peer->recv_ev_active = false;
                ORTE_FORCED_TERMINATE(1);
                return;
            }
        }
        break;
    default:
        opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: invalid socket state(%d)",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&(peer->name)),
                    peer->state);
        // mca_oob_usock_peer_close(peer);
        break;
    }
}