Esempio n. 1
0
/*
 * Receives message data.
 * @param msg the message to be received into
 * @param peer the peer to receive from
 * @retval true if the whole message was received
 * @retval false if the whole message was not received
 */
bool mca_oob_tcp_msg_recv_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_peer_t * peer)
{
    /* has entire header been received */
    if(msg->msg_rwptr == msg->msg_rwiov) {
        if(mca_oob_tcp_msg_recv(msg, peer) == false)
            return false;

        /* allocate a buffer for the receive */
        MCA_OOB_TCP_HDR_NTOH(&msg->msg_hdr);
        if(msg->msg_hdr.msg_size > 0) {
             msg->msg_rwbuf = malloc(msg->msg_hdr.msg_size);
             if(NULL == msg->msg_rwbuf) {
                 opal_output(0, "%s-%s mca_oob_tcp_msg_recv_handler: malloc(%d) failed\n", 
                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                     ORTE_NAME_PRINT(&(peer->peer_name)),
                     msg->msg_hdr.msg_size);
                 mca_oob_tcp_peer_close(peer);
                 return false;
             }
             msg->msg_rwiov[1].iov_base = (ompi_iov_base_ptr_t)msg->msg_rwbuf;
             msg->msg_rwiov[1].iov_len = msg->msg_hdr.msg_size;
             msg->msg_rwnum = 1;
        } else {
             msg->msg_rwiov[1].iov_base = NULL;
             msg->msg_rwiov[1].iov_len = 0;
             msg->msg_rwnum = 0;
        }
        if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) {
            opal_output(0, "%s-%s (origin: %s) mca_oob_tcp_msg_recv_handler: size %lu\n",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&(peer->peer_name)),
                        ORTE_NAME_PRINT(&(msg->msg_hdr.msg_origin)),
                        (unsigned long)(msg->msg_hdr.msg_size) );
        }
    }

    /* do the right thing based on the message type */
    switch(msg->msg_hdr.msg_type)  {
        case MCA_OOB_TCP_IDENT:
            /* done - there is nothing else to receive */
            return true; 
        case MCA_OOB_TCP_PING:
            /* done - there is nothing else to receive */
            return true;
        case MCA_OOB_TCP_DATA:
            /* finish receiving message */
            return mca_oob_tcp_msg_recv(msg, peer);
        default:
            return true;
    }
}
Esempio n. 2
0
/*
 *  Receive the peers globally unique process identification from a newly
 *  connected socket and verify the expected response. If so, move the
 *  socket to a connected state.
 */
static int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* peer)
{
    mca_oob_tcp_hdr_t hdr;
    if((mca_oob_tcp_peer_recv_blocking(peer, &hdr, sizeof(hdr))) != sizeof(hdr)) {
        /* If the peer state is still CONNECT_ACK, that indicates that
           the error was a reset from the remote host because the
           connection was not able to be fully established.  In that
           case, Clean up the connection and give it another go.  */
        if (peer->peer_state == MCA_OOB_TCP_CONNECT_ACK) {
            struct timeval tv = { 1,0 };
            if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) {
                opal_output(0,
                            "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_connect_ack "
                            "connect failed during receive.  Restarting (%s).",
                            ORTE_NAME_ARGS(orte_process_info.my_name),
                            ORTE_NAME_ARGS(&(peer->peer_name)),
                            strerror(opal_socket_errno));
            }
            opal_event_del(&peer->peer_recv_event);
            mca_oob_tcp_peer_shutdown(peer);
            opal_evtimer_add(&peer->peer_timer_event, &tv);
            return ORTE_SUCCESS;
        } else {
            mca_oob_tcp_peer_close(peer);
            return ORTE_ERR_UNREACH;
        }
    }
    MCA_OOB_TCP_HDR_NTOH(&hdr);
    if(hdr.msg_type != MCA_OOB_TCP_CONNECT) {
        opal_output(0, "mca_oob_tcp_peer_recv_connect_ack: invalid header type: %d\n", 
                    hdr.msg_type);
        mca_oob_tcp_peer_close(peer);
        return ORTE_ERR_UNREACH;
    }

    /* compare the peers name to the expected value */
    if(memcmp(&peer->peer_name, &hdr.msg_src, sizeof(orte_process_name_t)) != 0) {
        opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_connect_ack: "
            "received unexpected process identifier [%d,%d,%d]\n",
            ORTE_NAME_ARGS(orte_process_info.my_name),
            ORTE_NAME_ARGS(&(peer->peer_name)),
            ORTE_NAME_ARGS(&(hdr.msg_src)));
        mca_oob_tcp_peer_close(peer);
        return ORTE_ERR_UNREACH;
    }

    /* if we have an invalid name or do not have one assigned at all -
     * use the name returned by the peer.  This needs to be a LITERAL
     * comparison - we do NOT want wildcard values to return EQUAL
     */
    if(orte_process_info.my_name == NULL) {
        orte_ns.create_process_name(&orte_process_info.my_name, 
            hdr.msg_dst.cellid, hdr.msg_dst.jobid, hdr.msg_dst.vpid);
    } else if (orte_ns.compare_fields(ORTE_NS_CMP_ALL, orte_process_info.my_name, ORTE_NAME_INVALID) == ORTE_EQUAL) {
        *orte_process_info.my_name = hdr.msg_dst;
    }

    /* connected */
    mca_oob_tcp_peer_connected(peer);
    if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) {
        mca_oob_tcp_peer_dump(peer, "connected");
    }
    return ORTE_SUCCESS;
}
Esempio n. 3
0
void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata)
{
    mca_oob_tcp_msg_error_t *mop = (mca_oob_tcp_msg_error_t*)cbdata;
    uint64_t ui64;
    orte_rml_send_t *snd;
    orte_oob_base_peer_t *bpr;

    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                        "%s tcp:unknown hop called for peer %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&mop->hop));

    if (orte_finalizing || orte_abnormal_term_ordered) {
        /* just ignore the problem */
        OBJ_RELEASE(mop);
        return;
    }

   /* mark that this component cannot reach this hop */
    memcpy(&ui64, (char*)&(mop->hop), sizeof(uint64_t));
    if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
                                                         ui64, (void**)&bpr) ||
        NULL == bpr) {
        /* the overall OOB has no knowledge of this hop. Only
         * way this could happen is if the peer contacted us
         * via this component, and it wasn't entered into the
         * OOB framework hash table. We have no way of knowing
         * what to do next, so just output an error message and
         * abort */
        opal_output(0, "%s ERROR: message to %s requires routing and the OOB has no knowledge of the reqd hop %s",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&mop->snd->hdr.dst),
                    ORTE_NAME_PRINT(&mop->hop));
        ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED);
        OBJ_RELEASE(mop);
        return;
    }
    opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx);

    /* mark that this component cannot reach this destination either */
    memcpy(&ui64, (char*)&(mop->snd->hdr.dst), sizeof(uint64_t));
    if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
                                                         ui64, (void**)&bpr) ||
        NULL == bpr) {
        opal_output(0, "%s ERROR: message to %s requires routing and the OOB has no knowledge of this process",
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&mop->snd->hdr.dst));
        ORTE_ACTIVATE_PROC_STATE(&mop->hop, ORTE_PROC_STATE_COMM_FAILED);
        OBJ_RELEASE(mop);
        return;
    }
    opal_bitmap_clear_bit(&bpr->addressable, mca_oob_tcp_component.super.idx);

    /* post the message to the OOB so it can see
     * if another component can transfer it
     */
    MCA_OOB_TCP_HDR_NTOH(&mop->snd->hdr);
    snd = OBJ_NEW(orte_rml_send_t);
    snd->dst = mop->snd->hdr.dst;
    snd->origin = mop->snd->hdr.origin;
    snd->tag = mop->snd->hdr.tag;
    snd->data = mop->snd->data;
    snd->count = mop->snd->hdr.nbytes;
    snd->cbfunc.iov = NULL;
    snd->cbdata = NULL;
    /* activate the OOB send state */
    ORTE_OOB_SEND(snd);
    /* protect the data */
    mop->snd->data = NULL;

    OBJ_RELEASE(mop);
}
Esempio n. 4
0
void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata)
{
    mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t*)cbdata;
    int rc;
    orte_process_name_t hop;
    mca_oob_tcp_peer_t *relay;
    uint64_t ui64;

    if (orte_abnormal_term_ordered) {
        return;
    }

    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                        "%s:tcp:recv:handler called for peer %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&peer->name));

    switch (peer->state) {
    case MCA_OOB_TCP_CONNECT_ACK:
        if (ORTE_SUCCESS == (rc = mca_oob_tcp_peer_recv_connect_ack(peer, peer->sd, NULL))) {
            opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s:tcp:recv:handler starting send/recv events",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            /* we connected! Start the send/recv events */
            if (!peer->recv_ev_active) {
                opal_event_add(&peer->recv_event, 0);
                peer->recv_ev_active = true;
            }
            if (peer->timer_ev_active) {
                opal_event_del(&peer->timer_event);
                peer->timer_ev_active = false;
            }
            /* if there is a message waiting to be sent, queue it */
            if (NULL == peer->send_msg) {
                peer->send_msg = (mca_oob_tcp_send_t*)opal_list_remove_first(&peer->send_queue);
            }
            if (NULL != peer->send_msg && !peer->send_ev_active) {
                opal_event_add(&peer->send_event, 0);
                peer->send_ev_active = true;
            }
            /* update our state */
            peer->state = MCA_OOB_TCP_CONNECTED;
        } else {
            opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s UNABLE TO COMPLETE CONNECT ACK WITH %s",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                ORTE_NAME_PRINT(&peer->name));
            opal_event_del(&peer->recv_event);
            ORTE_FORCED_TERMINATE(1);
            return;
        }
        break;
    case MCA_OOB_TCP_CONNECTED:
        opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                            "%s:tcp:recv:handler CONNECTED",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        /* allocate a new message and setup for recv */
        if (NULL == peer->recv_msg) {
            opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s:tcp:recv:handler allocate new recv msg",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            peer->recv_msg = OBJ_NEW(mca_oob_tcp_recv_t);
            if (NULL == peer->recv_msg) {
                opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to allocate recv message\n",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&(peer->name)));
                return;
            }
            /* start by reading the header */
            peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr;
            peer->recv_msg->rdbytes = sizeof(mca_oob_tcp_hdr_t);
        }
        /* if the header hasn't been completely read, read it */
        if (!peer->recv_msg->hdr_recvd) {
            opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s:tcp:recv:handler read hdr",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
                OPAL_TIMING_EVENT((&tm,"Header received from %s",
                                   ORTE_NAME_PRINT(&peer->name)));
                /* completed reading the header */
                peer->recv_msg->hdr_recvd = true;
                /* convert the header */
                MCA_OOB_TCP_HDR_NTOH(&peer->recv_msg->hdr);
                /* if this is a zero-byte message, then we are done */
                if (0 == peer->recv_msg->hdr.nbytes) {
                    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                        "%s RECVD ZERO-BYTE MESSAGE FROM %s for tag %d",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                        ORTE_NAME_PRINT(&peer->name), peer->recv_msg->hdr.tag);
                    peer->recv_msg->data = NULL;  // make sure
                } else {
                    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                        "%s:tcp:recv:handler allocate data region of size %lu",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)peer->recv_msg->hdr.nbytes);
                    /* allocate the data region */
                    peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes);
                    /* point to it */
                    peer->recv_msg->rdptr = peer->recv_msg->data;
                    peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes;
                }
                /* fall thru and attempt to read the data */
            } else if (ORTE_ERR_RESOURCE_BUSY == rc ||
                       ORTE_ERR_WOULD_BLOCK == rc) {
                /* exit this event and let the event lib progress */
                return;
            } else {
                /* close the connection */
                opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                    "%s:tcp:recv:handler error reading bytes - closing connection",
                                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
                mca_oob_tcp_peer_close(peer);
                return;
            }
        }

        if (peer->recv_msg->hdr_recvd) {
            /* continue to read the data block - we start from
             * wherever we left off, which could be at the
             * beginning or somewhere in the message
             */
            if (ORTE_SUCCESS == (rc = read_bytes(peer))) {

                OPAL_TIMING_EVENT((&tm,"Msg received from %s",
                                   ORTE_NAME_PRINT(&peer->name)));


                /* we recvd all of the message */
                opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                    "%s RECVD COMPLETE MESSAGE FROM %s (ORIGIN %s) OF %d BYTES FOR DEST %s TAG %d",
                                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                    ORTE_NAME_PRINT(&peer->name),
                                    ORTE_NAME_PRINT(&peer->recv_msg->hdr.origin),
                                    (int)peer->recv_msg->hdr.nbytes,
                                    ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst),
                                    peer->recv_msg->hdr.tag);
                /* am I the intended recipient (header was already converted back to host order)? */
                if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid &&
                    peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) {
                    /* yes - post it to the RML for delivery */
                    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                        "%s DELIVERING TO RML",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
                    ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag,
                                          peer->recv_msg->data,
                                          peer->recv_msg->hdr.nbytes);
                    OBJ_RELEASE(peer->recv_msg);
                } else {
                    /* no - find the next hop in the route */
                    hop = orte_routed.get_route(&peer->recv_msg->hdr.dst);
                    if (hop.jobid == ORTE_JOBID_INVALID ||
                        hop.vpid == ORTE_VPID_INVALID) {
                        /* no hop known - post the error to the component
                         * and let the OOB see if there is another way
                         * to get there from here
                         */
                        opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                            "%s NO ROUTE TO %s FROM HERE",
                                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                            ORTE_NAME_PRINT(&peer->name));
                        /* let the component know about the problem */
                        ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, peer->recv_msg, &hop, mca_oob_tcp_component_no_route);
                        /* cleanup */
                        OBJ_RELEASE(peer->recv_msg);
                        return;
                    } else {
                        /* does we know how to reach the next hop? */
                        memcpy(&ui64, (char*)&hop, sizeof(uint64_t));
                        if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_tcp_module.peers, ui64, (void**)&relay)) {
                            opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                                "%s ADDRESS OF NEXT HOP %s TO %s IS UNKNOWN",
                                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                                ORTE_NAME_PRINT(&hop),
                                                ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst));
                            /* let the component know about the problem */
                            ORTE_ACTIVATE_TCP_MSG_ERROR(NULL, peer->recv_msg, &hop, mca_oob_tcp_component_hop_unknown);
                            /* cleanup */
                            OBJ_RELEASE(peer->recv_msg);
                            return;
                        }
                        opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                            "%s ROUTING TO %s FROM HERE",
                                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                            ORTE_NAME_PRINT(&relay->name));
                        /* if this came from a different job family, then ensure
                         * we know how to return
                         */
                        if (ORTE_JOB_FAMILY(peer->recv_msg->hdr.origin.jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
                            orte_routed.update_route(&(peer->recv_msg->hdr.origin), &peer->name);
                        }
                        /* post the message for retransmission */
                        MCA_OOB_TCP_QUEUE_RELAY(peer->recv_msg, relay);
                        OBJ_RELEASE(peer->recv_msg);
                    }
                }
                peer->recv_msg = NULL;
                return;
            } else if (ORTE_ERR_RESOURCE_BUSY == rc ||
                       ORTE_ERR_WOULD_BLOCK == rc) {
                /* exit this event and let the event lib progress */
                return;
            } else {
                // report the error
                opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to recv message",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&(peer->name)));
                /* turn off the recv event */
                opal_event_del(&peer->recv_event);
                ORTE_FORCED_TERMINATE(1);
                return;
            }
        }
        break;
    default: 
        opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: invalid socket state(%d)", 
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&(peer->name)),
                    peer->state);
        // mca_oob_tcp_peer_close(peer);
        break;
    }
}
void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata)
{
    mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t*)cbdata;
    int rc;
    orte_rml_send_t *snd;

    if (orte_abnormal_term_ordered) {
        return;
    }

    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                        "%s:tcp:recv:handler called for peer %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&peer->name));

    switch (peer->state) {
    case MCA_OOB_TCP_CONNECT_ACK:
        if (ORTE_SUCCESS == (rc = mca_oob_tcp_peer_recv_connect_ack(peer, peer->sd, NULL))) {
            opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s:tcp:recv:handler starting send/recv events",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            /* we connected! Start the send/recv events */
            if (!peer->recv_ev_active) {
                opal_event_add(&peer->recv_event, 0);
                peer->recv_ev_active = true;
            }
            if (peer->timer_ev_active) {
                opal_event_del(&peer->timer_event);
                peer->timer_ev_active = false;
            }
            /* if there is a message waiting to be sent, queue it */
            if (NULL == peer->send_msg) {
                peer->send_msg = (mca_oob_tcp_send_t*)opal_list_remove_first(&peer->send_queue);
            }
            if (NULL != peer->send_msg && !peer->send_ev_active) {
                opal_event_add(&peer->send_event, 0);
                peer->send_ev_active = true;
            }
            /* update our state */
            peer->state = MCA_OOB_TCP_CONNECTED;
        } else {
            opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s UNABLE TO COMPLETE CONNECT ACK WITH %s",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                ORTE_NAME_PRINT(&peer->name));
            opal_event_del(&peer->recv_event);
            ORTE_FORCED_TERMINATE(1);
            return;
        }
        break;
    case MCA_OOB_TCP_CONNECTED:
        opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                            "%s:tcp:recv:handler CONNECTED",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
        /* allocate a new message and setup for recv */
        if (NULL == peer->recv_msg) {
            opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s:tcp:recv:handler allocate new recv msg",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            peer->recv_msg = OBJ_NEW(mca_oob_tcp_recv_t);
            if (NULL == peer->recv_msg) {
                opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to allocate recv message\n",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&(peer->name)));
                return;
            }
            /* start by reading the header */
            peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr;
            peer->recv_msg->rdbytes = sizeof(mca_oob_tcp_hdr_t);
        }
        /* if the header hasn't been completely read, read it */
        if (!peer->recv_msg->hdr_recvd) {
            opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                "%s:tcp:recv:handler read hdr",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
                /* completed reading the header */
                peer->recv_msg->hdr_recvd = true;
                /* convert the header */
                MCA_OOB_TCP_HDR_NTOH(&peer->recv_msg->hdr);
                /* if this is a zero-byte message, then we are done */
                if (0 == peer->recv_msg->hdr.nbytes) {
                    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                        "%s RECVD ZERO-BYTE MESSAGE FROM %s for tag %d",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                        ORTE_NAME_PRINT(&peer->name), peer->recv_msg->hdr.tag);
                    peer->recv_msg->data = NULL;  // make sure
                } else {
                    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                        "%s:tcp:recv:handler allocate data region of size %lu",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)peer->recv_msg->hdr.nbytes);
                    /* allocate the data region */
                    peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes);
                    /* point to it */
                    peer->recv_msg->rdptr = peer->recv_msg->data;
                    peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes;
                }
                /* fall thru and attempt to read the data */
            } else if (ORTE_ERR_RESOURCE_BUSY == rc ||
                       ORTE_ERR_WOULD_BLOCK == rc) {
                /* exit this event and let the event lib progress */
                return;
            } else {
                /* close the connection */
                opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                    "%s:tcp:recv:handler error reading bytes - closing connection",
                                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
                mca_oob_tcp_peer_close(peer);
                return;
            }
        }

        if (peer->recv_msg->hdr_recvd) {
            /* continue to read the data block - we start from
             * wherever we left off, which could be at the
             * beginning or somewhere in the message
             */
            if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
                /* we recvd all of the message */
                opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                    "%s RECVD COMPLETE MESSAGE FROM %s (ORIGIN %s) OF %d BYTES FOR DEST %s TAG %d",
                                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                    ORTE_NAME_PRINT(&peer->name),
                                    ORTE_NAME_PRINT(&peer->recv_msg->hdr.origin),
                                    (int)peer->recv_msg->hdr.nbytes,
                                    ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst),
                                    peer->recv_msg->hdr.tag);
                /* am I the intended recipient (header was already converted back to host order)? */
                if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid &&
                    peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) {
                    /* yes - post it to the RML for delivery */
                    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                        "%s DELIVERING TO RML",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
                    ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag,
                                          peer->recv_msg->data,
                                          peer->recv_msg->hdr.nbytes);
                    OBJ_RELEASE(peer->recv_msg);
                } else {
                    /* promote this to the OOB as some other transport might
                     * be the next best hop */
                    opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
                                        "%s TCP PROMOTING ROUTED MESSAGE FOR %s TO OOB",
                                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                        ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst));
                    snd = OBJ_NEW(orte_rml_send_t);
                    snd->dst = peer->recv_msg->hdr.dst;
                    snd->origin = peer->recv_msg->hdr.origin;
                    snd->tag = peer->recv_msg->hdr.tag;
                    snd->data = peer->recv_msg->data;
                    snd->count = peer->recv_msg->hdr.nbytes;
                    snd->cbfunc.iov = NULL;
                    snd->cbdata = NULL;
                    /* activate the OOB send state */
                    ORTE_OOB_SEND(snd);
                    /* protect the data */
                    peer->recv_msg->data = NULL;
                    /* cleanup */
                    OBJ_RELEASE(peer->recv_msg);
                }
                peer->recv_msg = NULL;
                return;
            } else if (ORTE_ERR_RESOURCE_BUSY == rc ||
                       ORTE_ERR_WOULD_BLOCK == rc) {
                /* exit this event and let the event lib progress */
                return;
            } else {
                // report the error
                opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to recv message",
                            ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                            ORTE_NAME_PRINT(&(peer->name)));
                /* turn off the recv event */
                opal_event_del(&peer->recv_event);
                ORTE_FORCED_TERMINATE(1);
                return;
            }
        }
        break;
    default: 
        opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: invalid socket state(%d)", 
                    ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                    ORTE_NAME_PRINT(&(peer->name)),
                    peer->state);
        // mca_oob_tcp_peer_close(peer);
        break;
    }
}
Esempio n. 6
0
int mca_oob_tcp_ping(
    const orte_process_name_t* name,
    const char* uri,
    const struct timeval *timeout)
{
    int sd, flags, rc;
    struct sockaddr_in inaddr;
    fd_set fdset;
    mca_oob_tcp_hdr_t hdr;
    struct timeval tv;
    struct iovec iov;
#ifndef __WINDOWS__
    struct opal_event sigpipe_handler;
#endif

    /* parse uri string */
    if(ORTE_SUCCESS != (rc = mca_oob_tcp_parse_uri(uri, &inaddr))) {
       opal_output(0,
            "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: invalid uri: %s\n",
            ORTE_NAME_ARGS(orte_process_info.my_name),
            ORTE_NAME_ARGS(name),
            uri);
        return rc;
    }

    /* create socket */
    sd = socket(AF_INET, SOCK_STREAM, 0);
    if (sd < 0) {
       opal_output(0,
            "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: socket() failed: %s (%d)\n",
            ORTE_NAME_ARGS(orte_process_info.my_name),
            ORTE_NAME_ARGS(name),
            strerror(opal_socket_errno),
            opal_socket_errno);
        return ORTE_ERR_UNREACH;
    }

    /* setup the socket as non-blocking */
    if((flags = fcntl(sd, F_GETFL, 0)) < 0) {
        opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_GETFL) failed: %s (%d)\n", 
            ORTE_NAME_ARGS(orte_process_info.my_name),
            ORTE_NAME_ARGS(name),
            strerror(opal_socket_errno),
            opal_socket_errno);
    } else {
        flags |= O_NONBLOCK;
        if(fcntl(sd, F_SETFL, flags) < 0) {
            opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n",
                ORTE_NAME_ARGS(orte_process_info.my_name),
                ORTE_NAME_ARGS(name),
                strerror(opal_socket_errno),
                opal_socket_errno);
        }
    }

    /* start the connect - will likely fail with EINPROGRESS */
    FD_ZERO(&fdset);
    if(connect(sd, (struct sockaddr*)&inaddr, sizeof(inaddr)) < 0) {
        /* connect failed? */
        if(opal_socket_errno != EINPROGRESS && opal_socket_errno != EWOULDBLOCK) {
            CLOSE_THE_SOCKET(sd);
            return ORTE_ERR_UNREACH;
        }

        /* select with timeout to wait for connect to complete */
        FD_SET(sd, &fdset);
        tv = *timeout;
        rc = select(sd+1, NULL, &fdset, NULL, &tv);
        if(rc <= 0) {
             CLOSE_THE_SOCKET(sd);
             return ORTE_ERR_UNREACH;
        }
    }

    /* set socket back to blocking */
    flags &= ~O_NONBLOCK;
    if(fcntl(sd, F_SETFL, flags) < 0) {
         opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n",
             ORTE_NAME_ARGS(orte_process_info.my_name),
             ORTE_NAME_ARGS(name),
             strerror(opal_socket_errno),
             opal_socket_errno);
    }

    /* send a probe message */
    memset(&hdr, 0, sizeof(hdr));
    if(orte_process_info.my_name != NULL) {
        hdr.msg_src = *orte_process_info.my_name;
    } else {
        hdr.msg_src = *ORTE_NAME_INVALID;
    }
    hdr.msg_dst = *name;
    hdr.msg_type = MCA_OOB_TCP_PROBE;
    MCA_OOB_TCP_HDR_HTON(&hdr);

#ifndef __WINDOWS__
    /* Ignore SIGPIPE in the write -- determine success or failure in
       the ping by looking at the return code from write() */
    opal_signal_set(&sigpipe_handler, SIGPIPE,
                    noop, &sigpipe_handler);
    opal_signal_add(&sigpipe_handler, NULL);
#endif
    /* Do the write and see what happens. Use the writev version just to
     * make Windows happy as there the write function is limitted to
     * file operations.
     */
    iov.iov_base = (IOVBASE_TYPE*)&hdr;
    iov.iov_len  = sizeof(hdr);
    rc = writev(sd, &iov, 1 );
#ifndef __WINDOWS__
    /* Now de-register the handler */
    opal_signal_del(&sigpipe_handler);
#endif
    if (rc != sizeof(hdr)) {
        CLOSE_THE_SOCKET(sd);
        return ORTE_ERR_UNREACH;
    }

    /* select with timeout to wait for response */
    FD_SET(sd, &fdset);
    tv = *timeout;
    rc = select(sd+1, &fdset, NULL, NULL, &tv);
    if(rc <= 0) {
        CLOSE_THE_SOCKET(sd);
        return ORTE_ERR_UNREACH;
    }
    if((rc = read(sd, &hdr, sizeof(hdr))) != sizeof(hdr)) {
        CLOSE_THE_SOCKET(sd);
        return ORTE_ERR_UNREACH;
    }
    MCA_OOB_TCP_HDR_NTOH(&hdr);
    if(hdr.msg_type != MCA_OOB_TCP_PROBE) {
        CLOSE_THE_SOCKET(sd);
        return ORTE_ERR_UNREACH;
    }
    CLOSE_THE_SOCKET(sd);
    return ORTE_SUCCESS;
}