Esempio n. 1
0
int
mca_btl_portals4_free(struct mca_btl_base_module_t* btl_base,
                      mca_btl_base_descriptor_t* des)
{
    struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
    mca_btl_portals4_frag_t* frag = (mca_btl_portals4_frag_t*) des;

    if (BTL_PORTALS4_FRAG_TYPE_EAGER == frag->type) {
        /* don't ever unlink eager frags */
        OPAL_BTL_PORTALS4_FRAG_RETURN_EAGER(portals4_btl, frag);

    } else if (BTL_PORTALS4_FRAG_TYPE_MAX == frag->type) {
        if (frag->me_h != PTL_INVALID_HANDLE) {
            frag->me_h = PTL_INVALID_HANDLE;
        }
        OPAL_BTL_PORTALS4_FRAG_RETURN_MAX(portals4_btl, frag);

    } else if (BTL_PORTALS4_FRAG_TYPE_USER == frag->type) {
        if (frag->me_h != PTL_INVALID_HANDLE) {
            frag->me_h = PTL_INVALID_HANDLE;
        }
        OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
            "mca_btl_portals4_free: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
        OPAL_BTL_PORTALS4_FRAG_RETURN_USER(portals4_btl, frag);
    } else {
        return OPAL_ERR_BAD_PARAM;
    }

    return OPAL_SUCCESS;
}
Esempio n. 2
0
int
mca_btl_portals4_component_progress(void)
{
    mca_btl_portals4_module_t *portals4_btl;
    int num_progressed = 0;
    int ret, btl_ownership;
    mca_btl_portals4_frag_t *frag = NULL;
    mca_btl_base_tag_t tag;
    static ptl_event_t ev;
    unsigned int which;
    mca_btl_active_message_callback_t* reg;
    mca_btl_base_segment_t seg[2];
    mca_btl_base_descriptor_t btl_base_descriptor;

    while (true) {
        ret = PtlEQPoll(mca_btl_portals4_component.eqs_h, mca_btl_portals4_component.num_btls, 0, &ev, &which);

        if (PTL_OK == ret) {
            OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlEQPoll Event received: %d (fail=%d) on NI %d\n",
                ev.type, ev.ni_fail_type, which));
            num_progressed++;
            portals4_btl = mca_btl_portals4_component.btls[which];

            switch (ev.type) {

            case PTL_EVENT_SEND:   /* generated on source (origin) when put stops sending */

                frag = ev.user_ptr;
                if (NULL == frag) {
                    opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_SEND event with NULL user_ptr");
                    break;
                }
                btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);

                if (!mca_btl_portals4_component.portals_need_ack) {
                    /* my part's done, in portals we trust! */
                    if( MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ){
                        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                            "PTL_EVENT_SEND: Direct call to des_cbfunc: %lx\n", (uint64_t)frag->base.des_cbfunc));
                        frag->base.des_cbfunc(&portals4_btl->super,
                                              frag->endpoint,
                                              &frag->base,
                                              OPAL_SUCCESS);
                    }
                    if (btl_ownership) {
                        mca_btl_portals4_free(&portals4_btl->super, &frag->base);
                    }
                    if (0 != frag->size) {
                        OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
                        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                            "PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n",
                            portals4_btl->portals_outstanding_ops));
                    }
                }

                goto done;
                break;

            case PTL_EVENT_ACK:   /* Ack that a put as completed on other side. We just call the callback function */

                frag = ev.user_ptr;
                if (NULL == frag) {
                    opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_ACK event with NULL user_ptr");
                    break;
                }
                OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                    "PTL_EVENT_ACK received rlength=%ld mlength=%ld des_flags=%d\n", ev.rlength, ev.mlength, frag->base.des_flags));
                btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);

                /* other side received the message.  should have
                   received entire thing */
                /* let the PML know we're done */
                if (MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ) {
                    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                        "PTL_EVENT_ACK: Call to des_cbfunc %lx\n", (uint64_t)frag->base.des_cbfunc));
                    frag->base.des_cbfunc(&portals4_btl->super,
                                          frag->endpoint,
                                          &frag->base,
                                          OPAL_SUCCESS);
                }
                if (btl_ownership) {
                    mca_btl_portals4_free(&portals4_btl->super, &frag->base);
                }

                if (0 != frag->size) {
                    OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
                    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                        "PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", portals4_btl->portals_outstanding_ops));
                }

                goto done;
                break;

            case PTL_EVENT_PUT:   /* Generated on destination (target) when a put into memory ends */

                tag = (unsigned char) (ev.hdr_data);

                btl_base_descriptor.des_segments = seg;
                btl_base_descriptor.des_segment_count = 1;
                seg[0].seg_addr.pval = ev.start;
                seg[0].seg_len = ev.mlength;

                reg = mca_btl_base_active_message_trigger + tag;
                OPAL_OUTPUT_VERBOSE((50, opal_btl_base_framework.framework_output,
                    "PTL_EVENT_PUT: tag=%x base_descriptor=%p cbfunc: %lx\n", tag, (void*)&btl_base_descriptor, (uint64_t)reg->cbfunc));
                reg->cbfunc(&portals4_btl->super, tag, &btl_base_descriptor, reg->cbdata);

                goto done;
                break;

            case PTL_EVENT_PUT_OVERFLOW:
                /* */
                OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                    "PTL_EVENT_OVERFLOW received\n"));
                goto done;
                break;

            case PTL_EVENT_LINK:
                /* */
                frag = ev.user_ptr;
                if (NULL == frag) {
                    opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_LINK event with NULL user_ptr");
                    break;
                }
                goto done;
                break;

            case PTL_EVENT_AUTO_UNLINK:
                /* */
                /* The Priority List is used, so PTL_EVENT_AUTO_FREE will never be received. So, we have to reactivate the block here */
                mca_btl_portals4_activate_block(ev.user_ptr);
                goto done;
                break;

            case PTL_EVENT_AUTO_FREE:
                /* */
                goto done;
                break;

            case PTL_EVENT_GET:   /* Generated on source (target) when a get from memory ends */
                /* */
                OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                    "PTL_EVENT_GET received at target rlength=%ld mlength=%ld\n", ev.rlength, ev.mlength));
                goto done;
                break;

            case PTL_EVENT_REPLY:
                /* */
                frag = ev.user_ptr;

                if (PTL_NI_PERM_VIOLATION == ev.ni_fail_type) {
                        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "Warning : PTL_EVENT_REPLY with PTL_NI_PERM_VIOLATION received, try to re-issue a PtlGet");

                    /* The distant PtlMEAppend is not finished (distant PTL_EVENT_LINK not received) */
                    /* Re-issue the PtlGet (see btl_portals4_rdma.c) */
                    ret = PtlGet(frag->md_h,
                                 0,
                                 frag->length,
                                 frag->peer_proc,
                                 portals4_btl->recv_idx,
                                 frag->match_bits, /* match bits */
                                 0,
                                 frag);
                    if (OPAL_UNLIKELY(PTL_OK != ret)) {
                        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                                            "%s:%d: Re-issued PtlGet failed: %d",
                                            __FILE__, __LINE__, ret);
                        PtlMDRelease(frag->md_h);
                        frag->md_h = PTL_INVALID_HANDLE;
                        return OPAL_ERROR;
                    }

                    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                        "Re-issued PtlGet length=%ld recv_idx=%d rank=%x pid=%x nid=%x match_bits=%lx\n",
                        frag->length, portals4_btl->recv_idx,
                        frag->peer_proc.rank, frag->peer_proc.phys.pid, frag->peer_proc.phys.nid, frag->match_bits));
                }
                else {
                    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                        "PTL_EVENT_REPLY: Call to rdma_cbfunc=%lx\n", (uint64_t)frag->rdma_cb.func));
                    frag->rdma_cb.func(&portals4_btl->super,
                                 frag->endpoint,
                                 ev.start,
                                 frag->rdma_cb.local_handle,
                                 frag->rdma_cb.context,
                                 frag->rdma_cb.data,
                                 OPAL_SUCCESS);
                    PtlMDRelease(frag->md_h);
                    frag->md_h = PTL_INVALID_HANDLE;

                    OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag);
                    OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); 
                    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                        "PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
                    goto done;
                }
                break;

            default:
                /* */
                goto done;
                break;
            }
        } else if (PTL_EQ_EMPTY == ret) {
            /* there's nothing in the queue.  This is actually the
               common case, so the easiest way to make the compiler
               emit something that doesn't completely blow here is to
               just go back to a good old goto */
            goto done;
            break;

        } else if (PTL_EQ_DROPPED == ret) {
            opal_output(opal_btl_base_framework.framework_output,
                        "Flow control situation without recovery (EQ_DROPPED)");
            break;
        } else {
            opal_output(opal_btl_base_framework.framework_output,
                        "Error returned from PtlEQPoll: %d", ret);
            break;
        }
    }
 done:
    return num_progressed;
}