Пример #1
0
static void mca_oob_ud_req_return (mca_oob_ud_req_t *req)
{
    opal_output_verbose(15, orte_oob_base_framework.framework_output,
                         "%s oob:ud:req_return returning req %p",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void *) req);

    mca_oob_ud_req_append_to_list (req, NULL);

    if (NULL != req->req_peer) {
        mca_oob_ud_peer_release (req->req_peer);
        req->req_peer = NULL;
    }

    if (NULL != req->req_wr.send) {
        free (req->req_wr.send);
        req->req_wr.send = NULL;
    }

    if (NULL != req->req_sge) {
        free (req->req_sge);
        req->req_sge = NULL;
    }

    OBJ_RELEASE(req);
}
Пример #2
0
static void *mca_oob_ud_complete_dispatch(int fd, int flags, void *context)
{
    mca_oob_ud_req_t *req;

    OPAL_THREAD_LOCK(&mca_oob_ud_component.ud_match_lock);
    while (NULL !=
           (req = (mca_oob_ud_req_t *) opal_list_remove_first (&mca_oob_ud_component.ud_event_queued_reqs))) {
        OPAL_THREAD_UNLOCK(&mca_oob_ud_component.ud_match_lock);

        OPAL_OUTPUT_VERBOSE((10, mca_oob_base_output, "%s oob:ud:event_process processing request %p",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void *) req));

        req->req_list = NULL;

        switch (req->type) {
        case MCA_OOB_UD_REQ_RECV:
        case MCA_OOB_UD_REQ_UNEX:
            if (req->state == MCA_OOB_UD_REQ_COMPLETE) {
                mca_oob_ud_recv_complete (req);
            } else {
                mca_oob_ud_req_append_to_list (req, &mca_oob_ud_component.ud_active_recvs);
                mca_oob_ud_recv_try (req);
            }
            break;
        case MCA_OOB_UD_REQ_SEND:
            if (req->state == MCA_OOB_UD_REQ_COMPLETE) {
                mca_oob_ud_send_complete (req, ORTE_SUCCESS);
            } else {
                mca_oob_ud_req_append_to_list (req, &mca_oob_ud_component.ud_active_sends);
                mca_oob_ud_send_try (req);
            }
            break;
        default:
            break;
        }

        OPAL_THREAD_LOCK(&mca_oob_ud_component.ud_match_lock);
    }

    return NULL;
}
Пример #3
0
void mca_oob_ud_event_queue_completed (mca_oob_ud_req_t *req)
{
    struct timeval now = {0, 0};

    mca_oob_ud_req_append_to_list (req, &mca_oob_ud_component.ud_event_queued_reqs);

    if (!opal_event_evtimer_pending (&mca_oob_ud_component.ud_complete_event, &now)) {
        opal_event_evtimer_set (orte_event_base, &mca_oob_ud_component.ud_complete_event,
                                mca_oob_ud_complete_dispatch, NULL);
        opal_event_add (&mca_oob_ud_component.ud_complete_event, &now);
    }
}
Пример #4
0
static int mca_oob_ud_recv_unex_complete (mca_oob_ud_req_t *req)
{
    mca_oob_ud_req_t *recv_req;
    int rc;

    rc = mca_oob_ud_find_pending_recv (req->req_origin, req->req_tag, &recv_req);

    if (ORTE_SUCCESS == rc) {
        return mca_oob_ud_recv_copy (recv_req, req);
    }

    mca_oob_ud_req_append_to_list (req, &mca_oob_ud_component.ud_unexpected_recvs);

    return ORTE_SUCCESS;
}
Пример #5
0
int mca_oob_ud_recv_match (mca_oob_ud_req_t *recv_req) {
    mca_oob_ud_req_t *urecv;
    int rc;

    OPAL_OUTPUT_VERBOSE((15, mca_oob_base_output, "%s oob:ud:recv_match posting receive. req = %p ",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void *) recv_req));

    rc = mca_oob_ud_find_unexpected_recv (recv_req->req_origin, recv_req->req_tag, &urecv);

    OPAL_OUTPUT_VERBOSE((15, mca_oob_base_output, "%s oob:ud:recv_match posting receive. found = %p ",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void *) urecv));

    if (ORTE_SUCCESS == rc) {
        recv_req->state = MCA_OOB_UD_REQ_COMPLETE;
        return mca_oob_ud_recv_copy (recv_req, urecv);
    }

    recv_req->state      = MCA_OOB_UD_REQ_PENDING;
    mca_oob_ud_req_append_to_list (recv_req, &mca_oob_ud_component.ud_pending_recvs);

    return ORTE_SUCCESS;
}
Пример #6
0
/* Caller MUST hold the matching lock before calling */
static inline int mca_oob_ud_find_recv (opal_list_t *list, const orte_process_name_t name,
                                        const int tag, mca_oob_ud_req_t **req)
{
    opal_list_item_t *item;
    int rc = ORTE_ERR_NOT_FOUND;

    *req = NULL;

    OPAL_THREAD_LOCK(&mca_oob_ud_component.ud_match_lock);

    for (item = opal_list_get_first (list) ; item != opal_list_get_end (list) ;
         item = opal_list_get_next (item)) {
        mca_oob_ud_req_t *recv_req = (mca_oob_ud_req_t *) item;

        OPAL_OUTPUT_VERBOSE((15, mca_oob_base_output, "%s oob:ud:find_recv matching against "
                             "peer: %s, tag: %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&recv_req->req_origin), recv_req->req_tag));

        if (OPAL_EQUAL == opal_dss.compare (&name, &recv_req->req_origin, ORTE_NAME) &&
            tag == recv_req->req_tag) {
            *req = recv_req;
            rc = ORTE_SUCCESS;
            break;
        }
    }

    OPAL_OUTPUT_VERBOSE((15, mca_oob_base_output, "%s oob:ud:find_recv %sfound",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_SUCCESS != rc ? "not " : ""));


    OPAL_THREAD_UNLOCK(&mca_oob_ud_component.ud_match_lock);

    if (ORTE_SUCCESS == rc) {
        mca_oob_ud_req_append_to_list (*req, NULL);
    }

    return rc;
}
Пример #7
0
int mca_oob_ud_process_send_nb(int fd, short args, void *cbdata)
{
    mca_oob_ud_msg_op_t *op = (mca_oob_ud_msg_op_t*)cbdata;

    orte_process_name_t hop;
    mca_oob_ud_peer_t *peer;
    mca_oob_ud_port_t *port;
    mca_oob_ud_msg_t  *req_msg;
    mca_oob_ud_req_t  *send_req;
    bool send_eager = false;
    char *pack_ptr;
    int rc, size, i;

    if (OPAL_EQUAL == orte_util_compare_name_fields
        (ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, &op->msg->dst)) {
        return mca_oob_ud_send_self (op->msg);
    }

    /* if we have a route to this peer, then we can reach it */
    hop = orte_routed.get_route(&op->msg->dst);
    if (ORTE_JOBID_INVALID == hop.jobid ||
        ORTE_VPID_INVALID == hop.vpid) {
        ORTE_ERROR_LOG(ORTE_ERR_UNREACH);
        return ORTE_ERR_UNREACH;
    }

    rc = mca_oob_ud_peer_lookup (&hop, &peer);
    if(ORTE_SUCCESS != rc || NULL == peer) {
        ORTE_ERROR_LOG((NULL == peer) ? ORTE_ERR_UNREACH : rc);
        return (NULL == peer) ? ORTE_ERR_UNREACH : rc;
    }

    opal_output_verbose(2, orte_oob_base_framework.framework_output,
                        "%s oob:ud:send_nb to pear %s via hop %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(&op->msg->dst), ORTE_NAME_PRINT(&hop));

    /* NTH: TODO -- get a random port? */
    port = (mca_oob_ud_port_t *) opal_list_get_first (&((mca_oob_ud_device_t *)peer->peer_context)->ports);

    send_req = OBJ_NEW(mca_oob_ud_req_t);
    if (!send_req) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

    /* fill in request */
    send_req->req_target = op->msg->dst;
    send_req->req_origin = op->msg->origin;
    send_req->req_tag    = op->msg->tag;

    if (op->msg->data != NULL) {
        size = op->msg->count;

        send_req->req_data_type = MCA_OOB_UD_REQ_TR;

        send_req->req_data.buf.p = (char *)calloc(size, sizeof(char));
        memcpy(send_req->req_data.buf.p, op->msg->data, op->msg->count);
        send_req->req_data.buf.size = op->msg->count;
    } else {
        MCA_OOB_UD_IOV_SIZE(op->msg, size);

        if (op->msg->iov != NULL) {
            send_req->req_data_type = MCA_OOB_UD_REQ_IOV;
            send_req->req_data.iov.uiov   = op->msg->iov;
            send_req->req_data.iov.count  = op->msg->count;
        } else {
            send_req->req_data_type = MCA_OOB_UD_REQ_BUF;

            opal_buffer_t *buffer;
            buffer = OBJ_NEW(opal_buffer_t);

            if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(buffer, op->msg->buffer))) {
                ORTE_ERROR_LOG(rc);
                OBJ_RELEASE(buffer);
                return rc;
            }

            if (OPAL_SUCCESS != (rc = opal_dss.unload(buffer, (void **)&send_req->req_data.buf.p, &send_req->req_data.buf.size)))
            {
                ORTE_ERROR_LOG(rc);
                OBJ_RELEASE(buffer);
                free(send_req->req_data.buf.p);
                return rc;
            }
            OBJ_RELEASE(buffer);
        }
    }
    send_req->rml_msg = op->msg;
    send_req->req_cbdata = op->msg->cbdata;
    send_req->req_peer   = peer;
    send_req->req_mtu    = port->mtu;
    send_req->req_port   = port;
    send_req->req_rc     = 0;

    send_req->state      = MCA_OOB_UD_REQ_PENDING;
    send_req->type       = MCA_OOB_UD_REQ_SEND;

    OBJ_RETAIN(peer);

    if (size + sizeof (mca_oob_ud_msg_hdr_t) <= (unsigned int)port->mtu) {
        send_eager = true;
    }

    rc = mca_oob_ud_msg_get (port, send_req, &port->listen_qp, peer, false, &req_msg);
    if (ORTE_SUCCESS != rc) {
        OBJ_RELEASE (send_req);
        return rc;
    }

    /* fill in message header */
    req_msg->hdr->msg_type     = MCA_OOB_UD_MSG_REQUEST;
    req_msg->hdr->msg_rem_ctx  = send_req;

    req_msg->hdr->msg_origin   = op->msg->origin;
    req_msg->hdr->msg_target   = op->msg->dst;

    req_msg->hdr->msg_data.req.data_len = size;
    req_msg->hdr->msg_data.req.mtu      = port->mtu;
    req_msg->hdr->msg_data.req.tag      = op->msg->tag;

    if (MCA_OOB_UD_REQ_IOV == send_req->req_data_type) {
        opal_output_verbose(10, orte_oob_base_framework.framework_output,
                             "%s-%s send_nb: tag %d size %lu. msg: %p. peer = %p. req = %p."
                             "count = %d. uiov = %p.\n",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&op->msg->dst),
                             op->msg->tag, (unsigned long)size,
                             (void *) req_msg,
                             (void *) peer, (void *) send_req,
                              send_req->req_data.iov.count, (void *) send_req->req_data.iov.uiov);
    } else {
        opal_output_verbose(10, orte_oob_base_framework.framework_output,
                             "%s-%s send_nb: tag %d size %lu. msg: %p. peer = %p. req = %p."
                             "buffer = %p.\n",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&op->msg->dst),
                             op->msg->tag, (unsigned long)size,
                             (void *) req_msg,
                             (void *) peer, (void *) send_req, (void *) send_req->req_data.buf.p);
    }

    if (!send_eager) {
        mca_oob_ud_req_append_to_list (send_req, &mca_oob_ud_component.ud_active_sends);

        /* send request */
        return mca_oob_ud_msg_post_send (req_msg);
    }

    pack_ptr = (char *)(req_msg->hdr + 1);

    if (op->msg->iov != NULL) {
        for (i = 0 ; i < op->msg->count ; ++i) {
            memcpy (pack_ptr, op->msg->iov[i].iov_base, op->msg->iov[i].iov_len);
            pack_ptr += op->msg->iov[i].iov_len;
        }
    } else {
        memcpy(pack_ptr, send_req->req_data.buf.p, send_req->req_data.buf.size);
    }

    send_req->req_list = NULL;

    req_msg->hdr->msg_data.req.data_follows = true;

    req_msg->cbfunc = mca_oob_ud_send_cb;
    req_msg->req    = send_req;

    do {
        /* send request */
        rc = mca_oob_ud_msg_post_send (req_msg);
        if (ORTE_SUCCESS != rc) {
            ORTE_ERROR_LOG(rc);
            break;
        }
    } while (0);

    return rc;
}