Exemple #1
0
void mca_oob_tcp_peer_shutdown(mca_oob_tcp_peer_t* peer)
{
    /* giving up and cleanup any pending messages */
    if(peer->peer_retries++ > mca_oob_tcp_component.tcp_peer_retries) {
        mca_oob_tcp_msg_t *msg;

        opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_shutdown: retries exceeded",
                    ORTE_NAME_ARGS(orte_process_info.my_name),
                    ORTE_NAME_ARGS(&(peer->peer_name)));

        /* There are cases during the initial connection setup where
           the peer_send_msg is NULL but there are things in the queue
           -- handle that case */
        if (NULL != (msg = peer->peer_send_msg)) {
            msg->msg_complete = true;
            msg->msg_rc = ORTE_ERR_UNREACH;
            mca_oob_tcp_msg_complete(msg, &peer->peer_name);
        }
        peer->peer_send_msg = NULL;
        while (NULL != 
               (msg = (mca_oob_tcp_msg_t*)opal_list_remove_first(&peer->peer_send_queue))) {
            msg->msg_complete = true;
            msg->msg_rc = ORTE_ERR_UNREACH;
            mca_oob_tcp_msg_complete(msg, &peer->peer_name);
        }

        /* We were unsuccessful in establishing a connection, and are
           not likely to suddenly become successful, so abort the
           whole thing */
        peer->peer_state = MCA_OOB_TCP_FAILED;
    }

    if (peer->peer_sd >= 0) {
        opal_event_del(&peer->peer_recv_event);
        opal_event_del(&peer->peer_send_event);
        CLOSE_THE_SOCKET(peer->peer_sd);
        peer->peer_sd = -1;
    } 
      
    opal_event_del(&peer->peer_timer_event);
    peer->peer_state = MCA_OOB_TCP_CLOSED;
}
Exemple #2
0
/*
 *  Initiate the appropriate action based on the state of the connection
 *  to the peer.
 *
 */
int mca_oob_tcp_peer_send(mca_oob_tcp_peer_t* peer, mca_oob_tcp_msg_t* msg)
{
    int rc = ORTE_SUCCESS;
    OPAL_THREAD_LOCK(&peer->peer_lock);
    switch(peer->peer_state) {
    case MCA_OOB_TCP_CONNECTING:
    case MCA_OOB_TCP_CONNECT_ACK:
    case MCA_OOB_TCP_CLOSED:
    case MCA_OOB_TCP_RESOLVE:
        /*
         * queue the message and attempt to resolve the peer address
         */
        opal_list_append(&peer->peer_send_queue, (opal_list_item_t*)msg);
        if(peer->peer_state == MCA_OOB_TCP_CLOSED) {
            peer->peer_state = MCA_OOB_TCP_RESOLVE;
            OPAL_THREAD_UNLOCK(&peer->peer_lock);
            return mca_oob_tcp_resolve(peer);
        }
        break;
    case MCA_OOB_TCP_FAILED:
        rc = ORTE_ERR_UNREACH;
        break;
    case MCA_OOB_TCP_CONNECTED:
        /*
         * start the message and queue if not completed 
         */
        if (NULL != peer->peer_send_msg) {
            opal_list_append(&peer->peer_send_queue, (opal_list_item_t*)msg);
        } else {
            /*if the send does not complete */
            if(!mca_oob_tcp_msg_send_handler(msg, peer)) {
                peer->peer_send_msg = msg;
                opal_event_add(&peer->peer_send_event, 0);
            } else {
                mca_oob_tcp_msg_complete(msg, &peer->peer_name);
            }
        }
        break;
    }
    OPAL_THREAD_UNLOCK(&peer->peer_lock);
    return rc;
}
Exemple #3
0
/*
 * A file descriptor is available/ready for send. Check the state
 * of the socket and take the appropriate action.
 */
static void mca_oob_tcp_peer_send_handler(int sd, short flags, void* user)
{
    mca_oob_tcp_peer_t* peer = (mca_oob_tcp_peer_t *)user;
    OPAL_THREAD_LOCK(&peer->peer_lock);
    switch(peer->peer_state) {
    case MCA_OOB_TCP_CONNECTING:
        mca_oob_tcp_peer_complete_connect(peer);
        break;
    case MCA_OOB_TCP_CONNECTED:
        {
        while(peer->peer_send_msg != NULL) {

            /* complete the current send */
            mca_oob_tcp_msg_t* msg = peer->peer_send_msg;
            if(mca_oob_tcp_msg_send_handler(msg, peer)) {
                mca_oob_tcp_msg_complete(msg, &peer->peer_name);
            } else {
                break;
            }

            /* if current completed - progress any pending sends */
            peer->peer_send_msg = (mca_oob_tcp_msg_t*)
                opal_list_remove_first(&peer->peer_send_queue);
        }
        
        /* if nothing else to do unregister for send event notifications */
        if(NULL == peer->peer_send_msg) {
            opal_event_del(&peer->peer_send_event);
        }
        break;
        }
    default:
        opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_send_handler: invalid connection state (%d)",
            ORTE_NAME_ARGS(orte_process_info.my_name),
            ORTE_NAME_ARGS(&(peer->peer_name)),
            peer->peer_state);
        opal_event_del(&peer->peer_send_event);
        break;
    }
    OPAL_THREAD_UNLOCK(&peer->peer_lock);
}
Exemple #4
0
/*
 * Progress a completed recv:
 * (1) signal a posted recv as complete
 * (2) queue an unexpected message in the recv list
 */
static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* peer)
{
    /* attempt to match unexpected message to a posted recv */
    mca_oob_tcp_msg_t* post;
    int rc;
    OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock);

    /* if I'm not a proc, check if this message came from
     * another job family - procs dont' need to do this because
     * they always route through their daemons anyway
     */
    if (!ORTE_PROC_IS_MPI) {
        if ((ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid) !=
             ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) &&
            (0 != ORTE_JOB_FAMILY(msg->msg_hdr.msg_origin.jobid))) {
            /* this message came from a different job family that is not
             * a local slave, so we may
             * not know how to route any reply back to the originator. Update
             * our route so we can dynamically build the routing table
             */
            if (ORTE_SUCCESS != (rc = orte_routed.update_route(&(msg->msg_hdr.msg_origin),
                                                               &(msg->msg_hdr.msg_src)))) {
                /* Nothing we can do about errors here as we definitely want
                 * the receive to complete, but at least bark loudly
                 */
                ORTE_ERROR_LOG(rc);
            }
        }
    }
    
    /* match msg against posted receives */
    post = mca_oob_tcp_msg_match_post(&msg->msg_hdr.msg_origin, msg->msg_hdr.msg_tag);
    if(NULL != post) {

        if(NULL == post->msg_uiov || 0 == post->msg_ucnt) {
            opal_output(0, "msg_data returning bad param");
            post->msg_rc = ORTE_ERR_BAD_PARAM;
        } else {
            /* copy msg data into posted recv */
            if (post->msg_flags & ORTE_RML_ALLOC) msg->msg_flags |= ORTE_RML_ALLOC;
            post->msg_rc = mca_oob_tcp_msg_copy(msg, post->msg_uiov, post->msg_ucnt);
            if(post->msg_flags & ORTE_RML_TRUNC) {
                 int i, size = 0;
                 for(i=1; i<msg->msg_rwcnt+1; i++)
                     size += msg->msg_rwiov[i].iov_len;
                 post->msg_rc = size;
            }
        }

        if(post->msg_flags & ORTE_RML_PEEK) {
            /* will need message for actual receive */
            opal_list_append(&mca_oob_tcp_component.tcp_msg_recv, &msg->super.super);
        } else {
            MCA_OOB_TCP_MSG_RETURN(msg);
        }
        mca_oob_tcp_component.tcp_match_count++;
        OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);

        if(post->msg_flags & ORTE_RML_PERSISTENT) {
            post->msg_cbfunc(
                post->msg_rc, 
                &peer->peer_name, 
                post->msg_uiov, 
                post->msg_ucnt, 
                post->msg_hdr.msg_tag, 
                post->msg_cbdata);
        } else {
            mca_oob_tcp_msg_complete(post, &msg->msg_hdr.msg_origin);
        }

        OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock);
        if(--mca_oob_tcp_component.tcp_match_count == 0)
            opal_condition_signal(&mca_oob_tcp_component.tcp_match_cond);
        OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);

    } else {
        opal_list_append(&mca_oob_tcp_component.tcp_msg_recv, (opal_list_item_t*)msg);
        OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock);
    }
}