Ejemplo n.º 1
0
static void
orte_iof_hnp_exception_handler(const orte_process_name_t* peer, orte_rml_exception_t reason)
{
#if 0
    orte_iof_base_endpoint_t *endpoint;
    opal_output_verbose(1, orte_iof_base.iof_output, 
                        "iof svc exception handler! %s\n",
                        ORTE_NAME_PRINT((orte_process_name_t*)peer));
    
    /* If we detect an exception on the RML connection to a peer,
     delete all of its subscriptions and publications.  Note that
     exceptions can be detected during a normal RML shutdown; they
     are recoverable events (no need to abort). */
    orte_iof_hnp_sub_delete_all(peer);
    orte_iof_hnp_pub_delete_all(peer);
    opal_output_verbose(1, orte_iof_base.iof_output, "deleted all pubs and subs\n");
    
    /* Find any streams on any endpoints for this peer and close them */
    while (NULL != 
           (endpoint = orte_iof_base_endpoint_match(peer, ORTE_NS_CMP_ALL,
                                                    ORTE_IOF_ANY))) {
        orte_iof_base_endpoint_closed(endpoint);
        
        /* Delete the endpoint that we just matched */
        orte_iof_base_endpoint_delete(peer, ORTE_NS_CMP_ALL, ORTE_IOF_ANY);
    }
#endif
    opal_output_verbose(1, orte_iof_base.iof_output, "done with exception handler\n");
}
int orte_iof_base_endpoint_forward(
    orte_iof_base_endpoint_t* endpoint,
    const orte_process_name_t* origin,
    orte_iof_base_msg_header_t* hdr,
    const unsigned char* data)
{
    opal_list_item_t* item;
    orte_iof_base_frag_t* frag;
    size_t len = hdr->msg_len;
    int rc = 0;

    if(endpoint->ep_mode != ORTE_IOF_SINK) {
        return ORTE_ERR_BAD_PARAM;
    }

    /* allocate and initialize a fragment */
    ORTE_IOF_BASE_FRAG_ALLOC(frag, rc);
    if(NULL == frag) {
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

    OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
    frag->frag_owner = endpoint;
    frag->frag_src = *origin;
    frag->frag_hdr.hdr_msg = *hdr;
    frag->frag_len = len;

    /* call any registered callbacks */
    for(item =  opal_list_get_first(&endpoint->ep_callbacks);
            item != opal_list_get_end(&endpoint->ep_callbacks);
            item =  opal_list_get_next(item)) {
        orte_iof_base_callback_t* cb = (orte_iof_base_callback_t*)item;
        cb->cb_func(
            &hdr->msg_origin,
            hdr->msg_tag,
            cb->cb_data,
            data,
            hdr->msg_len);
    }

    if(endpoint->ep_fd >= 0) {

        /* try to write w/out copying data */

        if(opal_list_get_size(&endpoint->ep_sink_frags) == 0) {
            if(len == 0) {
                /* No ACK required because the frag is of 0 length
                   (ACKs are based on fragment length; an ACK of 0
                   bytes would do nothing) */
                ORTE_IOF_BASE_FRAG_RETURN(frag);
                orte_iof_base_endpoint_closed(endpoint);
                OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
                return ORTE_SUCCESS;
            }
            rc = write(endpoint->ep_fd,data,len);
            if(rc < 0) {
                if (errno != EAGAIN && errno != EINTR) {
                    orte_iof_base_endpoint_closed(endpoint);

                    /* Send a ACK-AND-CLOSE back to the service so
                       that it knows not to wait for any further
                       ACKs */
                    orte_iof_base_frag_ack(frag, true);

                    OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
                    return ORTE_SUCCESS;
                }
                rc = 0;  /* don't affect the remaining length of the data */
            }
            frag->frag_len -= rc;
        }

        /* Ensure to handle both cases:
           1. When ep_sink_frags was not empty (regardless of frag_len)
           2. When ep_sink_frags was empty, but we fell through from above */
        if(frag->frag_len > 0 || 0 == len) {
            /* handle incomplete write - also queue up 0 byte message
             * and recognize this as a request to close the descriptor
             * when all pending operations complete
             */
            frag->frag_ptr = frag->frag_data;
            memcpy(frag->frag_ptr, data+rc, frag->frag_len);
            opal_list_append(&endpoint->ep_sink_frags, &frag->super.super);
            /* If we're the first frag to be put on the sink_frags
               list, then enable the event that will tell us when the
               fd becomes writeable */
            if(opal_list_get_size(&endpoint->ep_sink_frags) == 1) {
                opal_output(orte_iof_base.iof_output, "iof_base_endpoint forwarding frag; re-enabled reading for endpoint");
                opal_event_add(&endpoint->ep_event,0);
            }
            OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
        } else {
            OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
            /* acknowledge fragment */
            orte_iof_base_frag_ack(frag, false);
        }
    } else {
        OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
        /* acknowledge fragment */
        orte_iof_base_frag_ack(frag, false);
    }
    return ORTE_SUCCESS;
}
static void orte_iof_base_endpoint_write_handler(int sd, short flags, void *user)
{
    int errno_save;
    orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)user;

    /*
     * step through the list of queued fragments and attempt to write
     * until the output descriptor would block
    */
    OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
    while(opal_list_get_size(&endpoint->ep_sink_frags)) {
        orte_iof_base_frag_t* frag = (orte_iof_base_frag_t*)opal_list_get_first(&endpoint->ep_sink_frags);
        int rc;

        /* close connection on zero byte message */
        if(frag->frag_len == 0) {
            orte_iof_base_endpoint_closed(endpoint);
            OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
            return;
        }

        /* progress pending messages */
        rc = write(endpoint->ep_fd, frag->frag_ptr, frag->frag_len);
        errno_save = errno;
        if (rc < 0) {
            if (EAGAIN == errno_save) {
                break;
            }
            if (EINTR == errno_save) {
                continue;
            }
            /* All other errors -- to include sigpipe -- mean that
               Something Bad happened and we should abort in
               despair. */
            orte_iof_base_endpoint_closed(endpoint);

            /* Send a ACK-AND-CLOSE back to the service so that it
               knows not to wait for any further ACKs */
            orte_iof_base_frag_ack(frag, true);

            OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
            return;
        }
        frag->frag_len -= rc;
        frag->frag_ptr += rc;
        if(frag->frag_len > 0) {
            break;
        }
        opal_list_remove_item(&endpoint->ep_sink_frags, &frag->super.super);
        OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
        orte_iof_base_frag_ack(frag, false);
        OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);
    }

    /* is there anything left to write? */
    if(opal_list_get_size(&endpoint->ep_sink_frags) == 0) {
        opal_event_del(&endpoint->ep_event);
        if(orte_iof_base.iof_waiting) {
            opal_condition_signal(&orte_iof_base.iof_condition);
        }
    }
    OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
}
static void orte_iof_base_endpoint_read_handler(int fd, short flags, void *cbdata)
{
    orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)cbdata;
    orte_iof_base_frag_t* frag;
    orte_iof_base_header_t* hdr;
    int rc;

    /* allocate a fragment */
    ORTE_IOF_BASE_FRAG_ALLOC(frag,rc);
    if(NULL == frag) {
        /* JMS shouldn't we do something here? */
        return;
    }

    OPAL_THREAD_LOCK(&orte_iof_base.iof_lock);

    /* read up to the fragment size */
#if !defined(__WINDOWS__)
    rc = read(fd, frag->frag_data, sizeof(frag->frag_data));
#else
    {
        DWORD readed;
        HANDLE handle = (HANDLE)_get_osfhandle(fd);
        ReadFile(handle, frag->frag_data, sizeof(frag->frag_data), &readed, NULL);
        rc = (int)readed;
    }
#endif  /* !defined(__WINDOWS__) */
    if (rc < 0) {
        /* non-blocking, retry */
        if (EAGAIN == errno || EINTR == errno) {
            ORTE_IOF_BASE_FRAG_RETURN(frag);
            OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);
            return;
        }

        /* Error on the connection */
        orte_iof_base_endpoint_closed(endpoint);
        /* Fall through to send 0 byte message to other side
           indicating that the endpoint is now closed. */
        rc = 0;
    } else if (rc == 0) {
        /* peer has closed connection (will fall through to send a 0
           byte message, therefore telling the RML side that the fd
           side has closed its connection) */
        orte_iof_base_endpoint_closed(endpoint);
    }

    /* Do not append the fragment before we know that we have some
       data (even a 0 byte mesage is OK -- that indicates that the
       file descriptor has closed) */
    frag->frag_owner = endpoint;
    opal_list_append(&endpoint->ep_source_frags, &frag->super.super);
    opal_output(orte_iof_base.iof_output, "iof_base_endpoint: read handler, source_frags list len: %d",
                (int) opal_list_get_size(&endpoint->ep_source_frags));
    frag->frag_iov[1].iov_len = frag->frag_len = rc;

    /* fill in the header */
    hdr = &frag->frag_hdr;
    hdr->hdr_common.hdr_type = ORTE_IOF_BASE_HDR_MSG;
    hdr->hdr_msg.msg_origin = endpoint->ep_origin;
    hdr->hdr_msg.msg_proxy = *ORTE_PROC_MY_NAME;
    hdr->hdr_msg.msg_tag = endpoint->ep_tag;
    hdr->hdr_msg.msg_seq = endpoint->ep_seq;
    hdr->hdr_msg.msg_len = frag->frag_len;
    ORTE_IOF_BASE_HDR_MSG_HTON(hdr->hdr_msg);

    /* if window size has been exceeded - disable forwarding */
    endpoint->ep_seq += frag->frag_len;
    if(ORTE_IOF_BASE_SEQDIFF(endpoint->ep_seq,endpoint->ep_ack) > orte_iof_base.iof_window_size) {
        opal_output(orte_iof_base.iof_output, "iof_base_endpoint read handler: window exceeded -- reading disabled");
        opal_event_del(&endpoint->ep_event);
    }
    OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock);

    /* Increment the refcount on the endpoint so that it doesn't get
       deleted before the frag */
    OBJ_RETAIN(endpoint);

    /* start non-blocking RML call to forward received data */
    rc = orte_rml.send_nb(
             orte_iof_base.iof_service,
             frag->frag_iov,
             2,
             ORTE_RML_TAG_IOF_SVC,
             0,
             orte_iof_base_endpoint_send_cb,
             frag);
}