예제 #1
0
static int do_readv(MPID_Request *rreq, int pipe_fd, MPL_IOV iov[],
                    int *iov_offset, int *iov_count, int *complete)
{
    int mpi_errno = MPI_SUCCESS;
    ssize_t nread;

    nread = readv(pipe_fd, &rreq->dev.iov[rreq->dev.iov_offset], rreq->dev.iov_count);
    MPIR_ERR_CHKANDJUMP2(nread < 0 && errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**read",
                         "**readv %d %s", errno, MPIU_Strerror(errno));

    if (nread < 0) {
        if (errno == EAGAIN) goto fn_exit;
        MPIR_ERR_CHKANDJUMP2(errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**vmsplice",
                             "**vmsplice %d %s", errno, MPIU_Strerror(errno));
    }

    *complete = adjust_partially_xferred_iov(iov, iov_offset, iov_count, nread);
    if (*complete) {
        /* look for additional data to send and reload IOV if there is more */
        mpi_errno = check_req_complete(rreq->ch.vc, rreq, complete);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        if (*complete) {
            nread = close(pipe_fd);
            MPIR_ERR_CHKANDJUMP(nread < 0, mpi_errno, MPI_ERR_OTHER, "**close");
            MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, ".... complete");
        }
    }

fn_fail:
fn_exit:
    return mpi_errno;
}
예제 #2
0
static int do_vmsplice(MPID_Request *sreq, int pipe_fd, MPL_IOV iov[],
                       int *iov_offset, int *iov_count, int *complete)
{
    int mpi_errno = MPI_SUCCESS;
    ssize_t err;

#if 1
    err = vmsplice(pipe_fd, &iov[*iov_offset], *iov_count, SPLICE_F_NONBLOCK);
#else
    err = writev(pipe_fd, &iov[*iov_offset], *iov_count);
#endif

    if (err < 0) {
        if (errno == EAGAIN) goto fn_exit;
        MPIR_ERR_CHKANDJUMP2(errno != EAGAIN, mpi_errno, MPI_ERR_OTHER, "**vmsplice",
                             "**vmsplice %d %s", errno, MPIU_Strerror(errno));
    }

    *complete = adjust_partially_xferred_iov(iov, iov_offset, iov_count, err);
    if (*complete) {
        /* look for additional data to send and reload IOV if there is more */
        mpi_errno = check_req_complete(sreq->ch.vc, sreq, complete);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);

        if (*complete) {
            err = close(pipe_fd);
            MPIR_ERR_CHKANDJUMP(err < 0, mpi_errno, MPI_ERR_OTHER, "**close");
            MPL_DBG_MSG(MPIDI_CH3_DBG_CHANNEL, VERBOSE, ".... complete");
        }
    }

fn_fail:
fn_exit:
    return mpi_errno;
}
예제 #3
0
int MPID_nem_lmt_dma_handle_cookie(MPIDI_VC_t *vc, MPID_Request *req, MPID_IOV cookie)
{
    int mpi_errno = MPI_SUCCESS;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);

    if (cookie.MPID_IOV_LEN == 0 && cookie.MPID_IOV_BUF == NULL) {
        /* req is a send request, we need to initiate another knem request and
           send a COOKIE message back to the receiver indicating the lid
           returned from the ioctl. */
        int complete;
        knem_cookie_t s_cookie;

        /* This function will invoke the OnDataAvail function to load more data. */
        mpi_errno = check_req_complete(vc, req, &complete);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        /* If we were complete we should have received a DONE message instead
           of a COOKIE message. */
        MPIU_Assert(!complete);

        mpi_errno = do_dma_send(vc, req, req->dev.iov_count, &req->dev.iov[0], &s_cookie);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        MPID_nem_lmt_send_COOKIE(vc, req, &s_cookie, sizeof(knem_cookie_t));
    }
    else {
        /* req is a receive request and we need to continue receiving using the
           lid provided in the cookie iov. */
        mpi_errno = MPID_nem_lmt_dma_start_recv(vc, req, cookie);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

fn_fail:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_HANDLE_COOKIE);
    return MPI_SUCCESS;
}
예제 #4
0
int MPID_nem_lmt_dma_progress(void)
{
    int mpi_errno = MPI_SUCCESS;
    struct lmt_dma_node *prev = NULL;
    struct lmt_dma_node *free_me = NULL;
    struct lmt_dma_node *cur = outstanding_head;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);
    
    /* Iterate over a linked-list of (req,status_idx)-tuples looking for
       completed/failed requests.  Currently knem only provides status to the
       receiver, so all of these requests are recv requests. */
    while (cur) {
        switch (*cur->status_p) {
            case KNEM_STATUS_SUCCESS:
                {
                    /* complete the request if all data has been sent, remove it from the list */
                    int complete = 0;
                    mpi_errno = check_req_complete(cur->vc, cur->req, &complete);
                    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

                    free_status_index(cur->status_p - knem_status);

                    if (complete) {
                        /* request was completed by the OnDataAvail fn */
                        MPID_nem_lmt_send_DONE(cur->vc, cur->req); /* tell the other side to complete its request */
                        MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");

                    }
                    else {
                        /* There is more data to send.  We must inform the sender that we have
                           completely received the current batch and that the next batch should
                           be sent. */
                        MPID_nem_lmt_send_COOKIE(cur->vc, cur->req, NULL, 0);
                    }

                    /* Right now we always free the cur element, even if the
                       request is incomplete because it simplifies the logic. */
                    if (cur == outstanding_head) {
                        outstanding_head = cur->next;
                        prev = NULL;
                        free_me = cur;
                        cur = cur->next;
                    }
                    else {
                        prev->next = cur->next;
                        free_me = cur;
                        cur = cur->next;
                    }
                    if (free_me) MPIU_Free(free_me);
                    --MPID_nem_local_lmt_pending;
                    continue;
                }
                break;
            case KNEM_STATUS_FAILED:
                /* set the error status for the request, complete it then dequeue the entry */
                cur->req->status.MPI_ERROR = MPI_SUCCESS;
                MPIU_ERR_SET1(cur->req->status.MPI_ERROR, MPI_ERR_OTHER, "**recv_status", "**recv_status %d", *cur->status_p);

                MPIDI_CH3U_Request_complete(cur->req);

                if (cur == outstanding_head) {
                    outstanding_head = cur->next;
                    prev = NULL;
                    free_me = cur;
                    cur = cur->next;
                }
                else {
                    prev->next = cur->next;
                    free_me = cur;
                    cur = cur->next;
                }

                if (free_me) MPIU_Free(free_me);
                --MPID_nem_local_lmt_pending;
                continue;
                
                break;
            case KNEM_STATUS_PENDING:
                /* nothing to do here */
                break;
            default:
                MPIU_ERR_SETANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**invalid_knem_status",
                                     "**invalid_knem_status %d", *cur->status_p);
                break;
        }

        prev = cur;
        cur = cur->next;
    }

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_PROGRESS);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}
예제 #5
0
int MPID_nem_lmt_dma_start_recv(MPIDI_VC_t *vc, MPID_Request *rreq, MPID_IOV s_cookie)
{
    int mpi_errno = MPI_SUCCESS;
    int nodma;
    int dt_contig;
    MPI_Aint dt_true_lb;
    MPIDI_msg_sz_t data_sz;
    MPID_Datatype * dt_ptr;
    volatile knem_status_t *status;
    knem_status_t current_status;
    struct lmt_dma_node *node = NULL;
    MPIDI_STATE_DECL(MPID_STATE_MPID_NEM_LMT_DMA_START_RECV);

    MPIDI_FUNC_ENTER(MPID_STATE_MPID_NEM_LMT_DMA_START_RECV);

    /* MT: this code assumes only one thread can be at this point at a time */
    if (knem_fd < 0) {
        mpi_errno = open_knem_dev();
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);
    }

    /* find out contig/noncontig, size, and lb for the datatype */
    MPIDI_Datatype_get_info(rreq->dev.user_count, rreq->dev.datatype,
                            dt_contig, data_sz, dt_ptr, dt_true_lb);

    nodma = !knem_has_dma || data_sz < MPIR_CVAR_NEMESIS_LMT_DMA_THRESHOLD;

    if (dt_contig) {
        /* handle the iov creation ourselves */
        rreq->dev.iov[0].MPID_IOV_BUF = (char *)rreq->dev.user_buf + dt_true_lb;
        rreq->dev.iov[0].MPID_IOV_LEN = data_sz;
        rreq->dev.iov_count = 1;
    }
    else {
        if (rreq->dev.segment_ptr == NULL) {
            /* segment_ptr may be non-null when this is a continuation of a
               many-part message that we couldn't fit in one single flight of
               iovs. */
            MPIU_Assert(rreq->dev.segment_ptr == NULL);
            rreq->dev.segment_ptr = MPID_Segment_alloc();
            MPIU_ERR_CHKANDJUMP1((rreq->dev.segment_ptr == NULL), mpi_errno,
                                 MPI_ERR_OTHER, "**nomem",
                                 "**nomem %s", "MPID_Segment_alloc");
            MPID_Segment_init(rreq->dev.user_buf, rreq->dev.user_count,
                              rreq->dev.datatype, rreq->dev.segment_ptr, 0);
            rreq->dev.segment_first = 0;
            rreq->dev.segment_size = data_sz;

            /* see load_send_iov FIXME above */
            mpi_errno = MPIDI_CH3U_Request_load_recv_iov(rreq);
            if (mpi_errno) MPIU_ERR_POP(mpi_errno);
        }
    }

    MPIU_Assert(s_cookie.MPID_IOV_LEN == sizeof(knem_cookie_t));
    MPIU_Assert(s_cookie.MPID_IOV_BUF != NULL);
    mpi_errno = do_dma_recv(rreq->dev.iov_count, rreq->dev.iov,
                            *((knem_cookie_t *)s_cookie.MPID_IOV_BUF), nodma,
                            &status, &current_status);
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    /* TODO refactor this block and MPID_nem_lmt_dma_progress (and anywhere
     * else) to share a common function.  This advancement/completion code is
     * duplication. */
    if (current_status != KNEM_STATUS_PENDING) {
        /* complete the request if all data has been sent, remove it from the list */
        int complete = 0;

        MPIU_ERR_CHKANDJUMP1(current_status == KNEM_STATUS_FAILED, mpi_errno, MPI_ERR_OTHER,
                             "**recv_status", "**recv_status %d", current_status);

        mpi_errno = check_req_complete(vc, rreq, &complete);
        if (mpi_errno) MPIU_ERR_POP(mpi_errno);

        free_status_index(status - knem_status);

        if (complete) {
            /* request was completed by the OnDataAvail fn */
            MPID_nem_lmt_send_DONE(vc, rreq); /* tell the other side to complete its request */
            MPIU_DBG_MSG(CH3_CHANNEL, VERBOSE, ".... complete");

        }
        else {
            /* There is more data to send.  We must inform the sender that we have
               completely received the current batch and that the next batch should
               be sent. */
            MPID_nem_lmt_send_COOKIE(vc, rreq, NULL, 0);
        }
    }

    /* XXX DJG FIXME this looks like it always pushes! */
    /* push request if not complete for progress checks later */
    node = MPIU_Malloc(sizeof(struct lmt_dma_node));
    node->vc = vc;
    node->req = rreq;
    node->status_p = status;
    node->next = outstanding_head;
    outstanding_head = node;
    ++MPID_nem_local_lmt_pending;

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPID_NEM_LMT_DMA_START_RECV);
    return mpi_errno;
fn_fail:
    goto fn_exit;
}